aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-08-03 13:50:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-08-03 13:50:44 -0400
commit8f616cd5249e03c9e1b371623d85e76d4b86bbc1 (patch)
tree94cbd990ac45743ac75203a7e1bd6a8fb63c5bc2
parent7e31aa11fc672bbe0dd0da59513c9efe3809ced7 (diff)
parent7d55992d60caa390460bad1a974eb2b3c11538f4 (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: remove write-only variables from ext4_ordered_write_end ext4: unexport jbd2_journal_update_superblock ext4: Cleanup whitespace and other miscellaneous style issues ext4: improve ext4_fill_flex_info() a bit ext4: Cleanup the block reservation code path ext4: don't assume extents can't cross block groups when truncating ext4: Fix lack of credits BUG() when deleting a badly fragmented inode ext4: Fix ext4_ext_journal_restart() ext4: fix ext4_da_write_begin error path jbd2: don't abort if flushing file data failed ext4: don't read inode block if the buffer has a write error ext4: Don't allow lg prealloc list to be grow large. ext4: Convert the usage of NR_CPUS to nr_cpu_ids. ext4: Improve error handling in mballoc ext4: lock block groups when initializing ext4: sync up block and inode bitmap reading functions ext4: Allow read/only mounts with corrupted block group checksums ext4: Fix data corruption when writing to prealloc area
-rw-r--r--fs/ext4/acl.c188
-rw-r--r--fs/ext4/balloc.c11
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/extents.c55
-rw-r--r--fs/ext4/ialloc.c58
-rw-r--r--fs/ext4/inode.c164
-rw-r--r--fs/ext4/mballoc.c254
-rw-r--r--fs/ext4/mballoc.h10
-rw-r--r--fs/ext4/resize.c79
-rw-r--r--fs/ext4/super.c316
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/jbd2/commit.c24
-rw-r--r--fs/jbd2/journal.c1
13 files changed, 729 insertions, 434 deletions
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index c7d04e165446..694ed6fadcc8 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -40,34 +40,35 @@ ext4_acl_from_disk(const void *value, size_t size)
40 acl = posix_acl_alloc(count, GFP_NOFS); 40 acl = posix_acl_alloc(count, GFP_NOFS);
41 if (!acl) 41 if (!acl)
42 return ERR_PTR(-ENOMEM); 42 return ERR_PTR(-ENOMEM);
43 for (n=0; n < count; n++) { 43 for (n = 0; n < count; n++) {
44 ext4_acl_entry *entry = 44 ext4_acl_entry *entry =
45 (ext4_acl_entry *)value; 45 (ext4_acl_entry *)value;
46 if ((char *)value + sizeof(ext4_acl_entry_short) > end) 46 if ((char *)value + sizeof(ext4_acl_entry_short) > end)
47 goto fail; 47 goto fail;
48 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); 48 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
49 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); 49 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
50 switch(acl->a_entries[n].e_tag) { 50
51 case ACL_USER_OBJ: 51 switch (acl->a_entries[n].e_tag) {
52 case ACL_GROUP_OBJ: 52 case ACL_USER_OBJ:
53 case ACL_MASK: 53 case ACL_GROUP_OBJ:
54 case ACL_OTHER: 54 case ACL_MASK:
55 value = (char *)value + 55 case ACL_OTHER:
56 sizeof(ext4_acl_entry_short); 56 value = (char *)value +
57 acl->a_entries[n].e_id = ACL_UNDEFINED_ID; 57 sizeof(ext4_acl_entry_short);
58 break; 58 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
59 59 break;
60 case ACL_USER: 60
61 case ACL_GROUP: 61 case ACL_USER:
62 value = (char *)value + sizeof(ext4_acl_entry); 62 case ACL_GROUP:
63 if ((char *)value > end) 63 value = (char *)value + sizeof(ext4_acl_entry);
64 goto fail; 64 if ((char *)value > end)
65 acl->a_entries[n].e_id =
66 le32_to_cpu(entry->e_id);
67 break;
68
69 default:
70 goto fail; 65 goto fail;
66 acl->a_entries[n].e_id =
67 le32_to_cpu(entry->e_id);
68 break;
69
70 default:
71 goto fail;
71 } 72 }
72 } 73 }
73 if (value != end) 74 if (value != end)
@@ -96,27 +97,26 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
96 return ERR_PTR(-ENOMEM); 97 return ERR_PTR(-ENOMEM);
97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); 98 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
98 e = (char *)ext_acl + sizeof(ext4_acl_header); 99 e = (char *)ext_acl + sizeof(ext4_acl_header);
99 for (n=0; n < acl->a_count; n++) { 100 for (n = 0; n < acl->a_count; n++) {
100 ext4_acl_entry *entry = (ext4_acl_entry *)e; 101 ext4_acl_entry *entry = (ext4_acl_entry *)e;
101 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 102 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
102 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 103 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
103 switch(acl->a_entries[n].e_tag) { 104 switch (acl->a_entries[n].e_tag) {
104 case ACL_USER: 105 case ACL_USER:
105 case ACL_GROUP: 106 case ACL_GROUP:
106 entry->e_id = 107 entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
107 cpu_to_le32(acl->a_entries[n].e_id); 108 e += sizeof(ext4_acl_entry);
108 e += sizeof(ext4_acl_entry); 109 break;
109 break; 110
110 111 case ACL_USER_OBJ:
111 case ACL_USER_OBJ: 112 case ACL_GROUP_OBJ:
112 case ACL_GROUP_OBJ: 113 case ACL_MASK:
113 case ACL_MASK: 114 case ACL_OTHER:
114 case ACL_OTHER: 115 e += sizeof(ext4_acl_entry_short);
115 e += sizeof(ext4_acl_entry_short); 116 break;
116 break; 117
117 118 default:
118 default: 119 goto fail;
119 goto fail;
120 } 120 }
121 } 121 }
122 return (char *)ext_acl; 122 return (char *)ext_acl;
@@ -167,23 +167,23 @@ ext4_get_acl(struct inode *inode, int type)
167 if (!test_opt(inode->i_sb, POSIX_ACL)) 167 if (!test_opt(inode->i_sb, POSIX_ACL))
168 return NULL; 168 return NULL;
169 169
170 switch(type) { 170 switch (type) {
171 case ACL_TYPE_ACCESS: 171 case ACL_TYPE_ACCESS:
172 acl = ext4_iget_acl(inode, &ei->i_acl); 172 acl = ext4_iget_acl(inode, &ei->i_acl);
173 if (acl != EXT4_ACL_NOT_CACHED) 173 if (acl != EXT4_ACL_NOT_CACHED)
174 return acl; 174 return acl;
175 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; 175 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
176 break; 176 break;
177 177
178 case ACL_TYPE_DEFAULT: 178 case ACL_TYPE_DEFAULT:
179 acl = ext4_iget_acl(inode, &ei->i_default_acl); 179 acl = ext4_iget_acl(inode, &ei->i_default_acl);
180 if (acl != EXT4_ACL_NOT_CACHED) 180 if (acl != EXT4_ACL_NOT_CACHED)
181 return acl; 181 return acl;
182 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; 182 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
183 break; 183 break;
184 184
185 default: 185 default:
186 return ERR_PTR(-EINVAL); 186 return ERR_PTR(-EINVAL);
187 } 187 }
188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0); 188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
189 if (retval > 0) { 189 if (retval > 0) {
@@ -201,14 +201,14 @@ ext4_get_acl(struct inode *inode, int type)
201 kfree(value); 201 kfree(value);
202 202
203 if (!IS_ERR(acl)) { 203 if (!IS_ERR(acl)) {
204 switch(type) { 204 switch (type) {
205 case ACL_TYPE_ACCESS: 205 case ACL_TYPE_ACCESS:
206 ext4_iset_acl(inode, &ei->i_acl, acl); 206 ext4_iset_acl(inode, &ei->i_acl, acl);
207 break; 207 break;
208 208
209 case ACL_TYPE_DEFAULT: 209 case ACL_TYPE_DEFAULT:
210 ext4_iset_acl(inode, &ei->i_default_acl, acl); 210 ext4_iset_acl(inode, &ei->i_default_acl, acl);
211 break; 211 break;
212 } 212 }
213 } 213 }
214 return acl; 214 return acl;
@@ -232,31 +232,31 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
232 if (S_ISLNK(inode->i_mode)) 232 if (S_ISLNK(inode->i_mode))
233 return -EOPNOTSUPP; 233 return -EOPNOTSUPP;
234 234
235 switch(type) { 235 switch (type) {
236 case ACL_TYPE_ACCESS: 236 case ACL_TYPE_ACCESS:
237 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; 237 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
238 if (acl) { 238 if (acl) {
239 mode_t mode = inode->i_mode; 239 mode_t mode = inode->i_mode;
240 error = posix_acl_equiv_mode(acl, &mode); 240 error = posix_acl_equiv_mode(acl, &mode);
241 if (error < 0) 241 if (error < 0)
242 return error; 242 return error;
243 else { 243 else {
244 inode->i_mode = mode; 244 inode->i_mode = mode;
245 ext4_mark_inode_dirty(handle, inode); 245 ext4_mark_inode_dirty(handle, inode);
246 if (error == 0) 246 if (error == 0)
247 acl = NULL; 247 acl = NULL;
248 }
249 } 248 }
250 break; 249 }
250 break;
251 251
252 case ACL_TYPE_DEFAULT: 252 case ACL_TYPE_DEFAULT:
253 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; 253 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
254 if (!S_ISDIR(inode->i_mode)) 254 if (!S_ISDIR(inode->i_mode))
255 return acl ? -EACCES : 0; 255 return acl ? -EACCES : 0;
256 break; 256 break;
257 257
258 default: 258 default:
259 return -EINVAL; 259 return -EINVAL;
260 } 260 }
261 if (acl) { 261 if (acl) {
262 value = ext4_acl_to_disk(acl, &size); 262 value = ext4_acl_to_disk(acl, &size);
@@ -269,14 +269,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
269 269
270 kfree(value); 270 kfree(value);
271 if (!error) { 271 if (!error) {
272 switch(type) { 272 switch (type) {
273 case ACL_TYPE_ACCESS: 273 case ACL_TYPE_ACCESS:
274 ext4_iset_acl(inode, &ei->i_acl, acl); 274 ext4_iset_acl(inode, &ei->i_acl, acl);
275 break; 275 break;
276 276
277 case ACL_TYPE_DEFAULT: 277 case ACL_TYPE_DEFAULT:
278 ext4_iset_acl(inode, &ei->i_default_acl, acl); 278 ext4_iset_acl(inode, &ei->i_default_acl, acl);
279 break; 279 break;
280 } 280 }
281 } 281 }
282 return error; 282 return error;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 495ab21b9832..1ae5004e93fc 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -314,25 +314,28 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
314 if (unlikely(!bh)) { 314 if (unlikely(!bh)) {
315 ext4_error(sb, __func__, 315 ext4_error(sb, __func__,
316 "Cannot read block bitmap - " 316 "Cannot read block bitmap - "
317 "block_group = %d, block_bitmap = %llu", 317 "block_group = %lu, block_bitmap = %llu",
318 (int)block_group, (unsigned long long)bitmap_blk); 318 block_group, bitmap_blk);
319 return NULL; 319 return NULL;
320 } 320 }
321 if (bh_uptodate_or_lock(bh)) 321 if (bh_uptodate_or_lock(bh))
322 return bh; 322 return bh;
323 323
324 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
324 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 325 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
325 ext4_init_block_bitmap(sb, bh, block_group, desc); 326 ext4_init_block_bitmap(sb, bh, block_group, desc);
326 set_buffer_uptodate(bh); 327 set_buffer_uptodate(bh);
327 unlock_buffer(bh); 328 unlock_buffer(bh);
329 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
328 return bh; 330 return bh;
329 } 331 }
332 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
330 if (bh_submit_read(bh) < 0) { 333 if (bh_submit_read(bh) < 0) {
331 put_bh(bh); 334 put_bh(bh);
332 ext4_error(sb, __func__, 335 ext4_error(sb, __func__,
333 "Cannot read block bitmap - " 336 "Cannot read block bitmap - "
334 "block_group = %d, block_bitmap = %llu", 337 "block_group = %lu, block_bitmap = %llu",
335 (int)block_group, (unsigned long long)bitmap_blk); 338 block_group, bitmap_blk);
336 return NULL; 339 return NULL;
337 } 340 }
338 ext4_valid_block_bitmap(sb, desc, block_group, bh); 341 ext4_valid_block_bitmap(sb, desc, block_group, bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 303e41cf7b14..6c7924d9e358 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
1044 1044
1045 1045
1046/* inode.c */ 1046/* inode.c */
1047void ext4_da_release_space(struct inode *inode, int used, int to_free);
1048int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 1047int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1049 struct buffer_head *bh, ext4_fsblk_t blocknr); 1048 struct buffer_head *bh, ext4_fsblk_t blocknr);
1050struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1049struct buffer_head *ext4_getblk(handle_t *, struct inode *,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 42c4c0c892ed..612c3d2c3824 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
99 if (handle->h_buffer_credits > needed) 99 if (handle->h_buffer_credits > needed)
100 return 0; 100 return 0;
101 err = ext4_journal_extend(handle, needed); 101 err = ext4_journal_extend(handle, needed);
102 if (err) 102 if (err <= 0)
103 return err; 103 return err;
104 return ext4_journal_restart(handle, needed); 104 return ext4_journal_restart(handle, needed);
105} 105}
@@ -1441,7 +1441,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
1441 1441
1442 /* 1442 /*
1443 * get the next allocated block if the extent in the path 1443 * get the next allocated block if the extent in the path
1444 * is before the requested block(s) 1444 * is before the requested block(s)
1445 */ 1445 */
1446 if (b2 < b1) { 1446 if (b2 < b1) {
1447 b2 = ext4_ext_next_allocated_block(path); 1447 b2 = ext4_ext_next_allocated_block(path);
@@ -1910,9 +1910,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1910 BUG_ON(b != ex_ee_block + ex_ee_len - 1); 1910 BUG_ON(b != ex_ee_block + ex_ee_len - 1);
1911 } 1911 }
1912 1912
1913 /* at present, extent can't cross block group: */ 1913 /*
1914 /* leaf + bitmap + group desc + sb + inode */ 1914 * 3 for leaf, sb, and inode plus 2 (bmap and group
1915 credits = 5; 1915 * descriptor) for each block group; assume two block
1916 * groups plus ex_ee_len/blocks_per_block_group for
1917 * the worst case
1918 */
1919 credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
1916 if (ex == EXT_FIRST_EXTENT(eh)) { 1920 if (ex == EXT_FIRST_EXTENT(eh)) {
1917 correct_index = 1; 1921 correct_index = 1;
1918 credits += (ext_depth(inode)) + 1; 1922 credits += (ext_depth(inode)) + 1;
@@ -2323,7 +2327,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2323 unsigned int newdepth; 2327 unsigned int newdepth;
2324 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ 2328 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
2325 if (allocated <= EXT4_EXT_ZERO_LEN) { 2329 if (allocated <= EXT4_EXT_ZERO_LEN) {
2326 /* Mark first half uninitialized. 2330 /*
2331 * iblock == ee_block is handled by the zerouout
2332 * at the beginning.
2333 * Mark first half uninitialized.
2327 * Mark second half initialized and zero out the 2334 * Mark second half initialized and zero out the
2328 * initialized extent 2335 * initialized extent
2329 */ 2336 */
@@ -2346,7 +2353,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2346 ex->ee_len = orig_ex.ee_len; 2353 ex->ee_len = orig_ex.ee_len;
2347 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2354 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2348 ext4_ext_dirty(handle, inode, path + depth); 2355 ext4_ext_dirty(handle, inode, path + depth);
2349 /* zeroed the full extent */ 2356 /* blocks available from iblock */
2350 return allocated; 2357 return allocated;
2351 2358
2352 } else if (err) 2359 } else if (err)
@@ -2374,6 +2381,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2374 err = PTR_ERR(path); 2381 err = PTR_ERR(path);
2375 return err; 2382 return err;
2376 } 2383 }
2384 /* get the second half extent details */
2377 ex = path[depth].p_ext; 2385 ex = path[depth].p_ext;
2378 err = ext4_ext_get_access(handle, inode, 2386 err = ext4_ext_get_access(handle, inode,
2379 path + depth); 2387 path + depth);
@@ -2403,6 +2411,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2403 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2411 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2404 ext4_ext_dirty(handle, inode, path + depth); 2412 ext4_ext_dirty(handle, inode, path + depth);
2405 /* zeroed the full extent */ 2413 /* zeroed the full extent */
2414 /* blocks available from iblock */
2406 return allocated; 2415 return allocated;
2407 2416
2408 } else if (err) 2417 } else if (err)
@@ -2418,23 +2427,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2418 */ 2427 */
2419 orig_ex.ee_len = cpu_to_le16(ee_len - 2428 orig_ex.ee_len = cpu_to_le16(ee_len -
2420 ext4_ext_get_actual_len(ex3)); 2429 ext4_ext_get_actual_len(ex3));
2421 if (newdepth != depth) { 2430 depth = newdepth;
2422 depth = newdepth; 2431 ext4_ext_drop_refs(path);
2423 ext4_ext_drop_refs(path); 2432 path = ext4_ext_find_extent(inode, iblock, path);
2424 path = ext4_ext_find_extent(inode, iblock, path); 2433 if (IS_ERR(path)) {
2425 if (IS_ERR(path)) { 2434 err = PTR_ERR(path);
2426 err = PTR_ERR(path); 2435 goto out;
2427 goto out;
2428 }
2429 eh = path[depth].p_hdr;
2430 ex = path[depth].p_ext;
2431 if (ex2 != &newex)
2432 ex2 = ex;
2433
2434 err = ext4_ext_get_access(handle, inode, path + depth);
2435 if (err)
2436 goto out;
2437 } 2436 }
2437 eh = path[depth].p_hdr;
2438 ex = path[depth].p_ext;
2439 if (ex2 != &newex)
2440 ex2 = ex;
2441
2442 err = ext4_ext_get_access(handle, inode, path + depth);
2443 if (err)
2444 goto out;
2445
2438 allocated = max_blocks; 2446 allocated = max_blocks;
2439 2447
2440 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying 2448 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
@@ -2452,6 +2460,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2452 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2460 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2453 ext4_ext_dirty(handle, inode, path + depth); 2461 ext4_ext_dirty(handle, inode, path + depth);
2454 /* zero out the first half */ 2462 /* zero out the first half */
2463 /* blocks available from iblock */
2455 return allocated; 2464 return allocated;
2456 } 2465 }
2457 } 2466 }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index a92eb305344f..655e760212b8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -97,34 +97,44 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
97 * Return buffer_head of bitmap on success or NULL. 97 * Return buffer_head of bitmap on success or NULL.
98 */ 98 */
99static struct buffer_head * 99static struct buffer_head *
100read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) 100ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
101{ 101{
102 struct ext4_group_desc *desc; 102 struct ext4_group_desc *desc;
103 struct buffer_head *bh = NULL; 103 struct buffer_head *bh = NULL;
104 ext4_fsblk_t bitmap_blk;
104 105
105 desc = ext4_get_group_desc(sb, block_group, NULL); 106 desc = ext4_get_group_desc(sb, block_group, NULL);
106 if (!desc) 107 if (!desc)
107 goto error_out; 108 return NULL;
108 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 109 bitmap_blk = ext4_inode_bitmap(sb, desc);
109 bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc)); 110 bh = sb_getblk(sb, bitmap_blk);
110 if (!buffer_uptodate(bh)) { 111 if (unlikely(!bh)) {
111 lock_buffer(bh); 112 ext4_error(sb, __func__,
112 if (!buffer_uptodate(bh)) { 113 "Cannot read inode bitmap - "
113 ext4_init_inode_bitmap(sb, bh, block_group, 114 "block_group = %lu, inode_bitmap = %llu",
114 desc); 115 block_group, bitmap_blk);
115 set_buffer_uptodate(bh); 116 return NULL;
116 }
117 unlock_buffer(bh);
118 }
119 } else {
120 bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
121 } 117 }
122 if (!bh) 118 if (bh_uptodate_or_lock(bh))
123 ext4_error(sb, "read_inode_bitmap", 119 return bh;
120
121 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
122 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
123 ext4_init_inode_bitmap(sb, bh, block_group, desc);
124 set_buffer_uptodate(bh);
125 unlock_buffer(bh);
126 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
127 return bh;
128 }
129 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
130 if (bh_submit_read(bh) < 0) {
131 put_bh(bh);
132 ext4_error(sb, __func__,
124 "Cannot read inode bitmap - " 133 "Cannot read inode bitmap - "
125 "block_group = %lu, inode_bitmap = %llu", 134 "block_group = %lu, inode_bitmap = %llu",
126 block_group, ext4_inode_bitmap(sb, desc)); 135 block_group, bitmap_blk);
127error_out: 136 return NULL;
137 }
128 return bh; 138 return bh;
129} 139}
130 140
@@ -200,7 +210,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
200 } 210 }
201 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 211 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
202 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 212 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
203 bitmap_bh = read_inode_bitmap(sb, block_group); 213 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
204 if (!bitmap_bh) 214 if (!bitmap_bh)
205 goto error_return; 215 goto error_return;
206 216
@@ -623,7 +633,7 @@ got_group:
623 goto fail; 633 goto fail;
624 634
625 brelse(bitmap_bh); 635 brelse(bitmap_bh);
626 bitmap_bh = read_inode_bitmap(sb, group); 636 bitmap_bh = ext4_read_inode_bitmap(sb, group);
627 if (!bitmap_bh) 637 if (!bitmap_bh)
628 goto fail; 638 goto fail;
629 639
@@ -728,7 +738,7 @@ got:
728 738
729 /* When marking the block group with 739 /* When marking the block group with
730 * ~EXT4_BG_INODE_UNINIT we don't want to depend 740 * ~EXT4_BG_INODE_UNINIT we don't want to depend
731 * on the value of bg_itable_unsed even though 741 * on the value of bg_itable_unused even though
732 * mke2fs could have initialized the same for us. 742 * mke2fs could have initialized the same for us.
733 * Instead we calculated the value below 743 * Instead we calculated the value below
734 */ 744 */
@@ -891,7 +901,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
891 901
892 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 902 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
893 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 903 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
894 bitmap_bh = read_inode_bitmap(sb, block_group); 904 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
895 if (!bitmap_bh) { 905 if (!bitmap_bh) {
896 ext4_warning(sb, __func__, 906 ext4_warning(sb, __func__,
897 "inode bitmap error for orphan %lu", ino); 907 "inode bitmap error for orphan %lu", ino);
@@ -969,7 +979,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
969 continue; 979 continue;
970 desc_count += le16_to_cpu(gdp->bg_free_inodes_count); 980 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
971 brelse(bitmap_bh); 981 brelse(bitmap_bh);
972 bitmap_bh = read_inode_bitmap(sb, i); 982 bitmap_bh = ext4_read_inode_bitmap(sb, i);
973 if (!bitmap_bh) 983 if (!bitmap_bh)
974 continue; 984 continue;
975 985
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9843b046c235..59fbbe899acc 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -191,6 +191,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
191void ext4_delete_inode (struct inode * inode) 191void ext4_delete_inode (struct inode * inode)
192{ 192{
193 handle_t *handle; 193 handle_t *handle;
194 int err;
194 195
195 if (ext4_should_order_data(inode)) 196 if (ext4_should_order_data(inode))
196 ext4_begin_ordered_truncate(inode, 0); 197 ext4_begin_ordered_truncate(inode, 0);
@@ -199,8 +200,9 @@ void ext4_delete_inode (struct inode * inode)
199 if (is_bad_inode(inode)) 200 if (is_bad_inode(inode))
200 goto no_delete; 201 goto no_delete;
201 202
202 handle = start_transaction(inode); 203 handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
203 if (IS_ERR(handle)) { 204 if (IS_ERR(handle)) {
205 ext4_std_error(inode->i_sb, PTR_ERR(handle));
204 /* 206 /*
205 * If we're going to skip the normal cleanup, we still need to 207 * If we're going to skip the normal cleanup, we still need to
206 * make sure that the in-core orphan linked list is properly 208 * make sure that the in-core orphan linked list is properly
@@ -213,8 +215,34 @@ void ext4_delete_inode (struct inode * inode)
213 if (IS_SYNC(inode)) 215 if (IS_SYNC(inode))
214 handle->h_sync = 1; 216 handle->h_sync = 1;
215 inode->i_size = 0; 217 inode->i_size = 0;
218 err = ext4_mark_inode_dirty(handle, inode);
219 if (err) {
220 ext4_warning(inode->i_sb, __func__,
221 "couldn't mark inode dirty (err %d)", err);
222 goto stop_handle;
223 }
216 if (inode->i_blocks) 224 if (inode->i_blocks)
217 ext4_truncate(inode); 225 ext4_truncate(inode);
226
227 /*
228 * ext4_ext_truncate() doesn't reserve any slop when it
229 * restarts journal transactions; therefore there may not be
230 * enough credits left in the handle to remove the inode from
231 * the orphan list and set the dtime field.
232 */
233 if (handle->h_buffer_credits < 3) {
234 err = ext4_journal_extend(handle, 3);
235 if (err > 0)
236 err = ext4_journal_restart(handle, 3);
237 if (err != 0) {
238 ext4_warning(inode->i_sb, __func__,
239 "couldn't extend journal (err %d)", err);
240 stop_handle:
241 ext4_journal_stop(handle);
242 goto no_delete;
243 }
244 }
245
218 /* 246 /*
219 * Kill off the orphan record which ext4_truncate created. 247 * Kill off the orphan record which ext4_truncate created.
220 * AKPM: I think this can be inside the above `if'. 248 * AKPM: I think this can be inside the above `if'.
@@ -952,6 +980,67 @@ out:
952 return err; 980 return err;
953} 981}
954 982
983/*
984 * Calculate the number of metadata blocks need to reserve
985 * to allocate @blocks for non extent file based file
986 */
987static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
988{
989 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
990 int ind_blks, dind_blks, tind_blks;
991
992 /* number of new indirect blocks needed */
993 ind_blks = (blocks + icap - 1) / icap;
994
995 dind_blks = (ind_blks + icap - 1) / icap;
996
997 tind_blks = 1;
998
999 return ind_blks + dind_blks + tind_blks;
1000}
1001
1002/*
1003 * Calculate the number of metadata blocks need to reserve
1004 * to allocate given number of blocks
1005 */
1006static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1007{
1008 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1009 return ext4_ext_calc_metadata_amount(inode, blocks);
1010
1011 return ext4_indirect_calc_metadata_amount(inode, blocks);
1012}
1013
1014static void ext4_da_update_reserve_space(struct inode *inode, int used)
1015{
1016 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1017 int total, mdb, mdb_free;
1018
1019 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1020 /* recalculate the number of metablocks still need to be reserved */
1021 total = EXT4_I(inode)->i_reserved_data_blocks - used;
1022 mdb = ext4_calc_metadata_amount(inode, total);
1023
1024 /* figure out how many metablocks to release */
1025 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1026 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1027
1028 /* Account for allocated meta_blocks */
1029 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1030
1031 /* update fs free blocks counter for truncate case */
1032 percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
1033
1034 /* update per-inode reservations */
1035 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1036 EXT4_I(inode)->i_reserved_data_blocks -= used;
1037
1038 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1039 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1040 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1041 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1042}
1043
955/* Maximum number of blocks we map for direct IO at once. */ 1044/* Maximum number of blocks we map for direct IO at once. */
956#define DIO_MAX_BLOCKS 4096 1045#define DIO_MAX_BLOCKS 4096
957/* 1046/*
@@ -965,10 +1054,9 @@ out:
965 1054
966 1055
967/* 1056/*
1057 * The ext4_get_blocks_wrap() function try to look up the requested blocks,
1058 * and returns if the blocks are already mapped.
968 * 1059 *
969 *
970 * ext4_ext4 get_block() wrapper function
971 * It will do a look up first, and returns if the blocks already mapped.
972 * Otherwise it takes the write lock of the i_data_sem and allocate blocks 1060 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
973 * and store the allocated blocks in the result buffer head and mark it 1061 * and store the allocated blocks in the result buffer head and mark it
974 * mapped. 1062 * mapped.
@@ -1069,7 +1157,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1069 * which were deferred till now 1157 * which were deferred till now
1070 */ 1158 */
1071 if ((retval > 0) && buffer_delay(bh)) 1159 if ((retval > 0) && buffer_delay(bh))
1072 ext4_da_release_space(inode, retval, 0); 1160 ext4_da_update_reserve_space(inode, retval);
1073 } 1161 }
1074 1162
1075 up_write((&EXT4_I(inode)->i_data_sem)); 1163 up_write((&EXT4_I(inode)->i_data_sem));
@@ -1336,12 +1424,8 @@ static int ext4_ordered_write_end(struct file *file,
1336{ 1424{
1337 handle_t *handle = ext4_journal_current_handle(); 1425 handle_t *handle = ext4_journal_current_handle();
1338 struct inode *inode = mapping->host; 1426 struct inode *inode = mapping->host;
1339 unsigned from, to;
1340 int ret = 0, ret2; 1427 int ret = 0, ret2;
1341 1428
1342 from = pos & (PAGE_CACHE_SIZE - 1);
1343 to = from + len;
1344
1345 ret = ext4_jbd2_file_inode(handle, inode); 1429 ret = ext4_jbd2_file_inode(handle, inode);
1346 1430
1347 if (ret == 0) { 1431 if (ret == 0) {
@@ -1437,36 +1521,6 @@ static int ext4_journalled_write_end(struct file *file,
1437 1521
1438 return ret ? ret : copied; 1522 return ret ? ret : copied;
1439} 1523}
1440/*
1441 * Calculate the number of metadata blocks need to reserve
1442 * to allocate @blocks for non extent file based file
1443 */
1444static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
1445{
1446 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1447 int ind_blks, dind_blks, tind_blks;
1448
1449 /* number of new indirect blocks needed */
1450 ind_blks = (blocks + icap - 1) / icap;
1451
1452 dind_blks = (ind_blks + icap - 1) / icap;
1453
1454 tind_blks = 1;
1455
1456 return ind_blks + dind_blks + tind_blks;
1457}
1458
1459/*
1460 * Calculate the number of metadata blocks need to reserve
1461 * to allocate given number of blocks
1462 */
1463static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1464{
1465 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1466 return ext4_ext_calc_metadata_amount(inode, blocks);
1467
1468 return ext4_indirect_calc_metadata_amount(inode, blocks);
1469}
1470 1524
1471static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1525static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1472{ 1526{
@@ -1490,7 +1544,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1490 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1544 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1491 return -ENOSPC; 1545 return -ENOSPC;
1492 } 1546 }
1493
1494 /* reduce fs free blocks counter */ 1547 /* reduce fs free blocks counter */
1495 percpu_counter_sub(&sbi->s_freeblocks_counter, total); 1548 percpu_counter_sub(&sbi->s_freeblocks_counter, total);
1496 1549
@@ -1501,35 +1554,31 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1501 return 0; /* success */ 1554 return 0; /* success */
1502} 1555}
1503 1556
1504void ext4_da_release_space(struct inode *inode, int used, int to_free) 1557static void ext4_da_release_space(struct inode *inode, int to_free)
1505{ 1558{
1506 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1559 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1507 int total, mdb, mdb_free, release; 1560 int total, mdb, mdb_free, release;
1508 1561
1509 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1562 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1510 /* recalculate the number of metablocks still need to be reserved */ 1563 /* recalculate the number of metablocks still need to be reserved */
1511 total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free; 1564 total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
1512 mdb = ext4_calc_metadata_amount(inode, total); 1565 mdb = ext4_calc_metadata_amount(inode, total);
1513 1566
1514 /* figure out how many metablocks to release */ 1567 /* figure out how many metablocks to release */
1515 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1568 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1516 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1569 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1517 1570
1518 /* Account for allocated meta_blocks */
1519 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1520
1521 release = to_free + mdb_free; 1571 release = to_free + mdb_free;
1522 1572
1523 /* update fs free blocks counter for truncate case */ 1573 /* update fs free blocks counter for truncate case */
1524 percpu_counter_add(&sbi->s_freeblocks_counter, release); 1574 percpu_counter_add(&sbi->s_freeblocks_counter, release);
1525 1575
1526 /* update per-inode reservations */ 1576 /* update per-inode reservations */
1527 BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks); 1577 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
1528 EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free); 1578 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1529 1579
1530 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1580 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1531 EXT4_I(inode)->i_reserved_meta_blocks = mdb; 1581 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1532 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1533 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1582 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1534} 1583}
1535 1584
@@ -1551,7 +1600,7 @@ static void ext4_da_page_release_reservation(struct page *page,
1551 } 1600 }
1552 curr_off = next_off; 1601 curr_off = next_off;
1553 } while ((bh = bh->b_this_page) != head); 1602 } while ((bh = bh->b_this_page) != head);
1554 ext4_da_release_space(page->mapping->host, 0, to_release); 1603 ext4_da_release_space(page->mapping->host, to_release);
1555} 1604}
1556 1605
1557/* 1606/*
@@ -2280,8 +2329,11 @@ retry:
2280 } 2329 }
2281 2330
2282 page = __grab_cache_page(mapping, index); 2331 page = __grab_cache_page(mapping, index);
2283 if (!page) 2332 if (!page) {
2284 return -ENOMEM; 2333 ext4_journal_stop(handle);
2334 ret = -ENOMEM;
2335 goto out;
2336 }
2285 *pagep = page; 2337 *pagep = page;
2286 2338
2287 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 2339 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
@@ -3590,6 +3642,16 @@ static int __ext4_get_inode_loc(struct inode *inode,
3590 } 3642 }
3591 if (!buffer_uptodate(bh)) { 3643 if (!buffer_uptodate(bh)) {
3592 lock_buffer(bh); 3644 lock_buffer(bh);
3645
3646 /*
3647 * If the buffer has the write error flag, we have failed
3648 * to write out another inode in the same block. In this
3649 * case, we don't have to read the block because we may
3650 * read the old inode data successfully.
3651 */
3652 if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
3653 set_buffer_uptodate(bh);
3654
3593 if (buffer_uptodate(bh)) { 3655 if (buffer_uptodate(bh)) {
3594 /* someone brought it uptodate while we waited */ 3656 /* someone brought it uptodate while we waited */
3595 unlock_buffer(bh); 3657 unlock_buffer(bh);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8d141a25bbee..865e9ddb44d4 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
787 if (bh_uptodate_or_lock(bh[i])) 787 if (bh_uptodate_or_lock(bh[i]))
788 continue; 788 continue;
789 789
790 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
790 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 791 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
791 ext4_init_block_bitmap(sb, bh[i], 792 ext4_init_block_bitmap(sb, bh[i],
792 first_group + i, desc); 793 first_group + i, desc);
793 set_buffer_uptodate(bh[i]); 794 set_buffer_uptodate(bh[i]);
794 unlock_buffer(bh[i]); 795 unlock_buffer(bh[i]);
796 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
795 continue; 797 continue;
796 } 798 }
799 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
797 get_bh(bh[i]); 800 get_bh(bh[i]);
798 bh[i]->b_end_io = end_buffer_read_sync; 801 bh[i]->b_end_io = end_buffer_read_sync;
799 submit_bh(READ, bh[i]); 802 submit_bh(READ, bh[i]);
@@ -2477,7 +2480,7 @@ err_freesgi:
2477int ext4_mb_init(struct super_block *sb, int needs_recovery) 2480int ext4_mb_init(struct super_block *sb, int needs_recovery)
2478{ 2481{
2479 struct ext4_sb_info *sbi = EXT4_SB(sb); 2482 struct ext4_sb_info *sbi = EXT4_SB(sb);
2480 unsigned i; 2483 unsigned i, j;
2481 unsigned offset; 2484 unsigned offset;
2482 unsigned max; 2485 unsigned max;
2483 int ret; 2486 int ret;
@@ -2537,7 +2540,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2537 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; 2540 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
2538 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2541 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2539 2542
2540 i = sizeof(struct ext4_locality_group) * NR_CPUS; 2543 i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
2541 sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); 2544 sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
2542 if (sbi->s_locality_groups == NULL) { 2545 if (sbi->s_locality_groups == NULL) {
2543 clear_opt(sbi->s_mount_opt, MBALLOC); 2546 clear_opt(sbi->s_mount_opt, MBALLOC);
@@ -2545,11 +2548,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2545 kfree(sbi->s_mb_maxs); 2548 kfree(sbi->s_mb_maxs);
2546 return -ENOMEM; 2549 return -ENOMEM;
2547 } 2550 }
2548 for (i = 0; i < NR_CPUS; i++) { 2551 for (i = 0; i < nr_cpu_ids; i++) {
2549 struct ext4_locality_group *lg; 2552 struct ext4_locality_group *lg;
2550 lg = &sbi->s_locality_groups[i]; 2553 lg = &sbi->s_locality_groups[i];
2551 mutex_init(&lg->lg_mutex); 2554 mutex_init(&lg->lg_mutex);
2552 INIT_LIST_HEAD(&lg->lg_prealloc_list); 2555 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2556 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2553 spin_lock_init(&lg->lg_prealloc_lock); 2557 spin_lock_init(&lg->lg_prealloc_lock);
2554 } 2558 }
2555 2559
@@ -3260,6 +3264,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3260 struct ext4_prealloc_space *pa) 3264 struct ext4_prealloc_space *pa)
3261{ 3265{
3262 unsigned int len = ac->ac_o_ex.fe_len; 3266 unsigned int len = ac->ac_o_ex.fe_len;
3267
3263 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, 3268 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3264 &ac->ac_b_ex.fe_group, 3269 &ac->ac_b_ex.fe_group,
3265 &ac->ac_b_ex.fe_start); 3270 &ac->ac_b_ex.fe_start);
@@ -3282,6 +3287,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3282static noinline_for_stack int 3287static noinline_for_stack int
3283ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3288ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3284{ 3289{
3290 int order, i;
3285 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3291 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3286 struct ext4_locality_group *lg; 3292 struct ext4_locality_group *lg;
3287 struct ext4_prealloc_space *pa; 3293 struct ext4_prealloc_space *pa;
@@ -3322,22 +3328,29 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3322 lg = ac->ac_lg; 3328 lg = ac->ac_lg;
3323 if (lg == NULL) 3329 if (lg == NULL)
3324 return 0; 3330 return 0;
3325 3331 order = fls(ac->ac_o_ex.fe_len) - 1;
3326 rcu_read_lock(); 3332 if (order > PREALLOC_TB_SIZE - 1)
3327 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) { 3333 /* The max size of hash table is PREALLOC_TB_SIZE */
3328 spin_lock(&pa->pa_lock); 3334 order = PREALLOC_TB_SIZE - 1;
3329 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { 3335
3330 atomic_inc(&pa->pa_count); 3336 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3331 ext4_mb_use_group_pa(ac, pa); 3337 rcu_read_lock();
3338 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3339 pa_inode_list) {
3340 spin_lock(&pa->pa_lock);
3341 if (pa->pa_deleted == 0 &&
3342 pa->pa_free >= ac->ac_o_ex.fe_len) {
3343 atomic_inc(&pa->pa_count);
3344 ext4_mb_use_group_pa(ac, pa);
3345 spin_unlock(&pa->pa_lock);
3346 ac->ac_criteria = 20;
3347 rcu_read_unlock();
3348 return 1;
3349 }
3332 spin_unlock(&pa->pa_lock); 3350 spin_unlock(&pa->pa_lock);
3333 ac->ac_criteria = 20;
3334 rcu_read_unlock();
3335 return 1;
3336 } 3351 }
3337 spin_unlock(&pa->pa_lock); 3352 rcu_read_unlock();
3338 } 3353 }
3339 rcu_read_unlock();
3340
3341 return 0; 3354 return 0;
3342} 3355}
3343 3356
@@ -3560,6 +3573,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3560 pa->pa_free = pa->pa_len; 3573 pa->pa_free = pa->pa_len;
3561 atomic_set(&pa->pa_count, 1); 3574 atomic_set(&pa->pa_count, 1);
3562 spin_lock_init(&pa->pa_lock); 3575 spin_lock_init(&pa->pa_lock);
3576 INIT_LIST_HEAD(&pa->pa_inode_list);
3563 pa->pa_deleted = 0; 3577 pa->pa_deleted = 0;
3564 pa->pa_linear = 1; 3578 pa->pa_linear = 1;
3565 3579
@@ -3580,10 +3594,10 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3580 list_add(&pa->pa_group_list, &grp->bb_prealloc_list); 3594 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3581 ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 3595 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3582 3596
3583 spin_lock(pa->pa_obj_lock); 3597 /*
3584 list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list); 3598 * We will later add the new pa to the right bucket
3585 spin_unlock(pa->pa_obj_lock); 3599 * after updating the pa_free in ext4_mb_release_context
3586 3600 */
3587 return 0; 3601 return 0;
3588} 3602}
3589 3603
@@ -3733,20 +3747,23 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3733 3747
3734 bitmap_bh = ext4_read_block_bitmap(sb, group); 3748 bitmap_bh = ext4_read_block_bitmap(sb, group);
3735 if (bitmap_bh == NULL) { 3749 if (bitmap_bh == NULL) {
3736 /* error handling here */ 3750 ext4_error(sb, __func__, "Error in reading block "
3737 ext4_mb_release_desc(&e4b); 3751 "bitmap for %lu\n", group);
3738 BUG_ON(bitmap_bh == NULL); 3752 return 0;
3739 } 3753 }
3740 3754
3741 err = ext4_mb_load_buddy(sb, group, &e4b); 3755 err = ext4_mb_load_buddy(sb, group, &e4b);
3742 BUG_ON(err != 0); /* error handling here */ 3756 if (err) {
3757 ext4_error(sb, __func__, "Error in loading buddy "
3758 "information for %lu\n", group);
3759 put_bh(bitmap_bh);
3760 return 0;
3761 }
3743 3762
3744 if (needed == 0) 3763 if (needed == 0)
3745 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; 3764 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3746 3765
3747 grp = ext4_get_group_info(sb, group);
3748 INIT_LIST_HEAD(&list); 3766 INIT_LIST_HEAD(&list);
3749
3750 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 3767 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3751repeat: 3768repeat:
3752 ext4_lock_group(sb, group); 3769 ext4_lock_group(sb, group);
@@ -3903,13 +3920,18 @@ repeat:
3903 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); 3920 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
3904 3921
3905 err = ext4_mb_load_buddy(sb, group, &e4b); 3922 err = ext4_mb_load_buddy(sb, group, &e4b);
3906 BUG_ON(err != 0); /* error handling here */ 3923 if (err) {
3924 ext4_error(sb, __func__, "Error in loading buddy "
3925 "information for %lu\n", group);
3926 continue;
3927 }
3907 3928
3908 bitmap_bh = ext4_read_block_bitmap(sb, group); 3929 bitmap_bh = ext4_read_block_bitmap(sb, group);
3909 if (bitmap_bh == NULL) { 3930 if (bitmap_bh == NULL) {
3910 /* error handling here */ 3931 ext4_error(sb, __func__, "Error in reading block "
3932 "bitmap for %lu\n", group);
3911 ext4_mb_release_desc(&e4b); 3933 ext4_mb_release_desc(&e4b);
3912 BUG_ON(bitmap_bh == NULL); 3934 continue;
3913 } 3935 }
3914 3936
3915 ext4_lock_group(sb, group); 3937 ext4_lock_group(sb, group);
@@ -4112,22 +4134,168 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4112 4134
4113} 4135}
4114 4136
4137static noinline_for_stack void
4138ext4_mb_discard_lg_preallocations(struct super_block *sb,
4139 struct ext4_locality_group *lg,
4140 int order, int total_entries)
4141{
4142 ext4_group_t group = 0;
4143 struct ext4_buddy e4b;
4144 struct list_head discard_list;
4145 struct ext4_prealloc_space *pa, *tmp;
4146 struct ext4_allocation_context *ac;
4147
4148 mb_debug("discard locality group preallocation\n");
4149
4150 INIT_LIST_HEAD(&discard_list);
4151 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4152
4153 spin_lock(&lg->lg_prealloc_lock);
4154 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4155 pa_inode_list) {
4156 spin_lock(&pa->pa_lock);
4157 if (atomic_read(&pa->pa_count)) {
4158 /*
4159 * This is the pa that we just used
4160 * for block allocation. So don't
4161 * free that
4162 */
4163 spin_unlock(&pa->pa_lock);
4164 continue;
4165 }
4166 if (pa->pa_deleted) {
4167 spin_unlock(&pa->pa_lock);
4168 continue;
4169 }
4170 /* only lg prealloc space */
4171 BUG_ON(!pa->pa_linear);
4172
4173 /* seems this one can be freed ... */
4174 pa->pa_deleted = 1;
4175 spin_unlock(&pa->pa_lock);
4176
4177 list_del_rcu(&pa->pa_inode_list);
4178 list_add(&pa->u.pa_tmp_list, &discard_list);
4179
4180 total_entries--;
4181 if (total_entries <= 5) {
4182 /*
4183 * we want to keep only 5 entries
4184 * allowing it to grow to 8. This
4185 * mak sure we don't call discard
4186 * soon for this list.
4187 */
4188 break;
4189 }
4190 }
4191 spin_unlock(&lg->lg_prealloc_lock);
4192
4193 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4194
4195 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4196 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4197 ext4_error(sb, __func__, "Error in loading buddy "
4198 "information for %lu\n", group);
4199 continue;
4200 }
4201 ext4_lock_group(sb, group);
4202 list_del(&pa->pa_group_list);
4203 ext4_mb_release_group_pa(&e4b, pa, ac);
4204 ext4_unlock_group(sb, group);
4205
4206 ext4_mb_release_desc(&e4b);
4207 list_del(&pa->u.pa_tmp_list);
4208 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4209 }
4210 if (ac)
4211 kmem_cache_free(ext4_ac_cachep, ac);
4212}
4213
4214/*
4215 * We have incremented pa_count. So it cannot be freed at this
4216 * point. Also we hold lg_mutex. So no parallel allocation is
4217 * possible from this lg. That means pa_free cannot be updated.
4218 *
4219 * A parallel ext4_mb_discard_group_preallocations is possible.
4220 * which can cause the lg_prealloc_list to be updated.
4221 */
4222
4223static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4224{
4225 int order, added = 0, lg_prealloc_count = 1;
4226 struct super_block *sb = ac->ac_sb;
4227 struct ext4_locality_group *lg = ac->ac_lg;
4228 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4229
4230 order = fls(pa->pa_free) - 1;
4231 if (order > PREALLOC_TB_SIZE - 1)
4232 /* The max size of hash table is PREALLOC_TB_SIZE */
4233 order = PREALLOC_TB_SIZE - 1;
4234 /* Add the prealloc space to lg */
4235 rcu_read_lock();
4236 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4237 pa_inode_list) {
4238 spin_lock(&tmp_pa->pa_lock);
4239 if (tmp_pa->pa_deleted) {
4240 spin_unlock(&pa->pa_lock);
4241 continue;
4242 }
4243 if (!added && pa->pa_free < tmp_pa->pa_free) {
4244 /* Add to the tail of the previous entry */
4245 list_add_tail_rcu(&pa->pa_inode_list,
4246 &tmp_pa->pa_inode_list);
4247 added = 1;
4248 /*
4249 * we want to count the total
4250 * number of entries in the list
4251 */
4252 }
4253 spin_unlock(&tmp_pa->pa_lock);
4254 lg_prealloc_count++;
4255 }
4256 if (!added)
4257 list_add_tail_rcu(&pa->pa_inode_list,
4258 &lg->lg_prealloc_list[order]);
4259 rcu_read_unlock();
4260
4261 /* Now trim the list to be not more than 8 elements */
4262 if (lg_prealloc_count > 8) {
4263 ext4_mb_discard_lg_preallocations(sb, lg,
4264 order, lg_prealloc_count);
4265 return;
4266 }
4267 return ;
4268}
4269
4115/* 4270/*
4116 * release all resource we used in allocation 4271 * release all resource we used in allocation
4117 */ 4272 */
4118static int ext4_mb_release_context(struct ext4_allocation_context *ac) 4273static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4119{ 4274{
4120 if (ac->ac_pa) { 4275 struct ext4_prealloc_space *pa = ac->ac_pa;
4121 if (ac->ac_pa->pa_linear) { 4276 if (pa) {
4277 if (pa->pa_linear) {
4122 /* see comment in ext4_mb_use_group_pa() */ 4278 /* see comment in ext4_mb_use_group_pa() */
4123 spin_lock(&ac->ac_pa->pa_lock); 4279 spin_lock(&pa->pa_lock);
4124 ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len; 4280 pa->pa_pstart += ac->ac_b_ex.fe_len;
4125 ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len; 4281 pa->pa_lstart += ac->ac_b_ex.fe_len;
4126 ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len; 4282 pa->pa_free -= ac->ac_b_ex.fe_len;
4127 ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len; 4283 pa->pa_len -= ac->ac_b_ex.fe_len;
4128 spin_unlock(&ac->ac_pa->pa_lock); 4284 spin_unlock(&pa->pa_lock);
4285 /*
4286 * We want to add the pa to the right bucket.
4287 * Remove it from the list and while adding
4288 * make sure the list to which we are adding
4289 * doesn't grow big.
4290 */
4291 if (likely(pa->pa_free)) {
4292 spin_lock(pa->pa_obj_lock);
4293 list_del_rcu(&pa->pa_inode_list);
4294 spin_unlock(pa->pa_obj_lock);
4295 ext4_mb_add_n_trim(ac);
4296 }
4129 } 4297 }
4130 ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa); 4298 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4131 } 4299 }
4132 if (ac->ac_bitmap_page) 4300 if (ac->ac_bitmap_page)
4133 page_cache_release(ac->ac_bitmap_page); 4301 page_cache_release(ac->ac_bitmap_page);
@@ -4420,11 +4588,15 @@ do_more:
4420 count -= overflow; 4588 count -= overflow;
4421 } 4589 }
4422 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 4590 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4423 if (!bitmap_bh) 4591 if (!bitmap_bh) {
4592 err = -EIO;
4424 goto error_return; 4593 goto error_return;
4594 }
4425 gdp = ext4_get_group_desc(sb, block_group, &gd_bh); 4595 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4426 if (!gdp) 4596 if (!gdp) {
4597 err = -EIO;
4427 goto error_return; 4598 goto error_return;
4599 }
4428 4600
4429 if (in_range(ext4_block_bitmap(sb, gdp), block, count) || 4601 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4430 in_range(ext4_inode_bitmap(sb, gdp), block, count) || 4602 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index bfe6add46bcf..c7c9906c2a75 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -164,11 +164,17 @@ struct ext4_free_extent {
164 * Locality group: 164 * Locality group:
165 * we try to group all related changes together 165 * we try to group all related changes together
166 * so that writeback can flush/allocate them together as well 166 * so that writeback can flush/allocate them together as well
167 * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
168 * (512). We store prealloc space into the hash based on the pa_free blocks
169 * order value.ie, fls(pa_free)-1;
167 */ 170 */
171#define PREALLOC_TB_SIZE 10
168struct ext4_locality_group { 172struct ext4_locality_group {
169 /* for allocator */ 173 /* for allocator */
170 struct mutex lg_mutex; /* to serialize allocates */ 174 /* to serialize allocates */
171 struct list_head lg_prealloc_list;/* list of preallocations */ 175 struct mutex lg_mutex;
176 /* list of preallocations */
177 struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
172 spinlock_t lg_prealloc_lock; 178 spinlock_t lg_prealloc_lock;
173}; 179};
174 180
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f000fbe2cd93..0a9265164265 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -73,7 +73,7 @@ static int verify_group_input(struct super_block *sb,
73 "Inode bitmap not in group (block %llu)", 73 "Inode bitmap not in group (block %llu)",
74 (unsigned long long)input->inode_bitmap); 74 (unsigned long long)input->inode_bitmap);
75 else if (outside(input->inode_table, start, end) || 75 else if (outside(input->inode_table, start, end) ||
76 outside(itend - 1, start, end)) 76 outside(itend - 1, start, end))
77 ext4_warning(sb, __func__, 77 ext4_warning(sb, __func__,
78 "Inode table not in group (blocks %llu-%llu)", 78 "Inode table not in group (blocks %llu-%llu)",
79 (unsigned long long)input->inode_table, itend - 1); 79 (unsigned long long)input->inode_table, itend - 1);
@@ -104,7 +104,7 @@ static int verify_group_input(struct super_block *sb,
104 (unsigned long long)input->inode_bitmap, 104 (unsigned long long)input->inode_bitmap,
105 start, metaend - 1); 105 start, metaend - 1);
106 else if (inside(input->inode_table, start, metaend) || 106 else if (inside(input->inode_table, start, metaend) ||
107 inside(itend - 1, start, metaend)) 107 inside(itend - 1, start, metaend))
108 ext4_warning(sb, __func__, 108 ext4_warning(sb, __func__,
109 "Inode table (%llu-%llu) overlaps" 109 "Inode table (%llu-%llu) overlaps"
110 "GDT table (%llu-%llu)", 110 "GDT table (%llu-%llu)",
@@ -158,9 +158,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
158 if (err) { 158 if (err) {
159 if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) 159 if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
160 return err; 160 return err;
161 if ((err = ext4_journal_get_write_access(handle, bh))) 161 if ((err = ext4_journal_get_write_access(handle, bh)))
162 return err; 162 return err;
163 } 163 }
164 164
165 return 0; 165 return 0;
166} 166}
@@ -416,11 +416,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
416 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", 416 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
417 gdb_num); 417 gdb_num);
418 418
419 /* 419 /*
420 * If we are not using the primary superblock/GDT copy don't resize, 420 * If we are not using the primary superblock/GDT copy don't resize,
421 * because the user tools have no way of handling this. Probably a 421 * because the user tools have no way of handling this. Probably a
422 * bad time to do it anyways. 422 * bad time to do it anyways.
423 */ 423 */
424 if (EXT4_SB(sb)->s_sbh->b_blocknr != 424 if (EXT4_SB(sb)->s_sbh->b_blocknr !=
425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
426 ext4_warning(sb, __func__, 426 ext4_warning(sb, __func__,
@@ -507,14 +507,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
507 return 0; 507 return 0;
508 508
509exit_inode: 509exit_inode:
510 //ext4_journal_release_buffer(handle, iloc.bh); 510 /* ext4_journal_release_buffer(handle, iloc.bh); */
511 brelse(iloc.bh); 511 brelse(iloc.bh);
512exit_dindj: 512exit_dindj:
513 //ext4_journal_release_buffer(handle, dind); 513 /* ext4_journal_release_buffer(handle, dind); */
514exit_primary: 514exit_primary:
515 //ext4_journal_release_buffer(handle, *primary); 515 /* ext4_journal_release_buffer(handle, *primary); */
516exit_sbh: 516exit_sbh:
517 //ext4_journal_release_buffer(handle, *primary); 517 /* ext4_journal_release_buffer(handle, *primary); */
518exit_dind: 518exit_dind:
519 brelse(dind); 519 brelse(dind);
520exit_bh: 520exit_bh:
@@ -818,12 +818,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
818 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) 818 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
819 goto exit_journal; 819 goto exit_journal;
820 820
821 /* 821 /*
822 * We will only either add reserved group blocks to a backup group 822 * We will only either add reserved group blocks to a backup group
823 * or remove reserved blocks for the first group in a new group block. 823 * or remove reserved blocks for the first group in a new group block.
824 * Doing both would be mean more complex code, and sane people don't 824 * Doing both would be mean more complex code, and sane people don't
825 * use non-sparse filesystems anymore. This is already checked above. 825 * use non-sparse filesystems anymore. This is already checked above.
826 */ 826 */
827 if (gdb_off) { 827 if (gdb_off) {
828 primary = sbi->s_group_desc[gdb_num]; 828 primary = sbi->s_group_desc[gdb_num];
829 if ((err = ext4_journal_get_write_access(handle, primary))) 829 if ((err = ext4_journal_get_write_access(handle, primary)))
@@ -835,24 +835,24 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
835 } else if ((err = add_new_gdb(handle, inode, input, &primary))) 835 } else if ((err = add_new_gdb(handle, inode, input, &primary)))
836 goto exit_journal; 836 goto exit_journal;
837 837
838 /* 838 /*
839 * OK, now we've set up the new group. Time to make it active. 839 * OK, now we've set up the new group. Time to make it active.
840 * 840 *
841 * Current kernels don't lock all allocations via lock_super(), 841 * Current kernels don't lock all allocations via lock_super(),
842 * so we have to be safe wrt. concurrent accesses the group 842 * so we have to be safe wrt. concurrent accesses the group
843 * data. So we need to be careful to set all of the relevant 843 * data. So we need to be careful to set all of the relevant
844 * group descriptor data etc. *before* we enable the group. 844 * group descriptor data etc. *before* we enable the group.
845 * 845 *
846 * The key field here is sbi->s_groups_count: as long as 846 * The key field here is sbi->s_groups_count: as long as
847 * that retains its old value, nobody is going to access the new 847 * that retains its old value, nobody is going to access the new
848 * group. 848 * group.
849 * 849 *
850 * So first we update all the descriptor metadata for the new 850 * So first we update all the descriptor metadata for the new
851 * group; then we update the total disk blocks count; then we 851 * group; then we update the total disk blocks count; then we
852 * update the groups count to enable the group; then finally we 852 * update the groups count to enable the group; then finally we
853 * update the free space counts so that the system can start 853 * update the free space counts so that the system can start
854 * using the new disk blocks. 854 * using the new disk blocks.
855 */ 855 */
856 856
857 /* Update group descriptor block for new group */ 857 /* Update group descriptor block for new group */
858 gdp = (struct ext4_group_desc *)((char *)primary->b_data + 858 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
@@ -946,7 +946,8 @@ exit_put:
946 return err; 946 return err;
947} /* ext4_group_add */ 947} /* ext4_group_add */
948 948
949/* Extend the filesystem to the new number of blocks specified. This entry 949/*
950 * Extend the filesystem to the new number of blocks specified. This entry
950 * point is only used to extend the current filesystem to the end of the last 951 * point is only used to extend the current filesystem to the end of the last
951 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" 952 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
952 * for emergencies (because it has no dependencies on reserved blocks). 953 * for emergencies (because it has no dependencies on reserved blocks).
@@ -1024,7 +1025,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1024 o_blocks_count + add, add); 1025 o_blocks_count + add, add);
1025 1026
1026 /* See if the device is actually as big as what was requested */ 1027 /* See if the device is actually as big as what was requested */
1027 bh = sb_bread(sb, o_blocks_count + add -1); 1028 bh = sb_bread(sb, o_blocks_count + add - 1);
1028 if (!bh) { 1029 if (!bh) {
1029 ext4_warning(sb, __func__, 1030 ext4_warning(sb, __func__,
1030 "can't read last block, resize aborted"); 1031 "can't read last block, resize aborted");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1e69f29a8c55..d5d77958b861 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -49,20 +49,19 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
49 unsigned long journal_devnum); 49 unsigned long journal_devnum);
50static int ext4_create_journal(struct super_block *, struct ext4_super_block *, 50static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
51 unsigned int); 51 unsigned int);
52static void ext4_commit_super (struct super_block * sb, 52static void ext4_commit_super(struct super_block *sb,
53 struct ext4_super_block * es, 53 struct ext4_super_block *es, int sync);
54 int sync); 54static void ext4_mark_recovery_complete(struct super_block *sb,
55static void ext4_mark_recovery_complete(struct super_block * sb, 55 struct ext4_super_block *es);
56 struct ext4_super_block * es); 56static void ext4_clear_journal_err(struct super_block *sb,
57static void ext4_clear_journal_err(struct super_block * sb, 57 struct ext4_super_block *es);
58 struct ext4_super_block * es);
59static int ext4_sync_fs(struct super_block *sb, int wait); 58static int ext4_sync_fs(struct super_block *sb, int wait);
60static const char *ext4_decode_error(struct super_block * sb, int errno, 59static const char *ext4_decode_error(struct super_block *sb, int errno,
61 char nbuf[16]); 60 char nbuf[16]);
62static int ext4_remount (struct super_block * sb, int * flags, char * data); 61static int ext4_remount(struct super_block *sb, int *flags, char *data);
63static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf); 62static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
64static void ext4_unlockfs(struct super_block *sb); 63static void ext4_unlockfs(struct super_block *sb);
65static void ext4_write_super (struct super_block * sb); 64static void ext4_write_super(struct super_block *sb);
66static void ext4_write_super_lockfs(struct super_block *sb); 65static void ext4_write_super_lockfs(struct super_block *sb);
67 66
68 67
@@ -211,15 +210,15 @@ static void ext4_handle_error(struct super_block *sb)
211 if (sb->s_flags & MS_RDONLY) 210 if (sb->s_flags & MS_RDONLY)
212 return; 211 return;
213 212
214 if (!test_opt (sb, ERRORS_CONT)) { 213 if (!test_opt(sb, ERRORS_CONT)) {
215 journal_t *journal = EXT4_SB(sb)->s_journal; 214 journal_t *journal = EXT4_SB(sb)->s_journal;
216 215
217 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 216 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
218 if (journal) 217 if (journal)
219 jbd2_journal_abort(journal, -EIO); 218 jbd2_journal_abort(journal, -EIO);
220 } 219 }
221 if (test_opt (sb, ERRORS_RO)) { 220 if (test_opt(sb, ERRORS_RO)) {
222 printk (KERN_CRIT "Remounting filesystem read-only\n"); 221 printk(KERN_CRIT "Remounting filesystem read-only\n");
223 sb->s_flags |= MS_RDONLY; 222 sb->s_flags |= MS_RDONLY;
224 } 223 }
225 ext4_commit_super(sb, es, 1); 224 ext4_commit_super(sb, es, 1);
@@ -228,13 +227,13 @@ static void ext4_handle_error(struct super_block *sb)
228 sb->s_id); 227 sb->s_id);
229} 228}
230 229
231void ext4_error (struct super_block * sb, const char * function, 230void ext4_error(struct super_block *sb, const char *function,
232 const char * fmt, ...) 231 const char *fmt, ...)
233{ 232{
234 va_list args; 233 va_list args;
235 234
236 va_start(args, fmt); 235 va_start(args, fmt);
237 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); 236 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
238 vprintk(fmt, args); 237 vprintk(fmt, args);
239 printk("\n"); 238 printk("\n");
240 va_end(args); 239 va_end(args);
@@ -242,7 +241,7 @@ void ext4_error (struct super_block * sb, const char * function,
242 ext4_handle_error(sb); 241 ext4_handle_error(sb);
243} 242}
244 243
245static const char *ext4_decode_error(struct super_block * sb, int errno, 244static const char *ext4_decode_error(struct super_block *sb, int errno,
246 char nbuf[16]) 245 char nbuf[16])
247{ 246{
248 char *errstr = NULL; 247 char *errstr = NULL;
@@ -278,8 +277,7 @@ static const char *ext4_decode_error(struct super_block * sb, int errno,
278/* __ext4_std_error decodes expected errors from journaling functions 277/* __ext4_std_error decodes expected errors from journaling functions
279 * automatically and invokes the appropriate error response. */ 278 * automatically and invokes the appropriate error response. */
280 279
281void __ext4_std_error (struct super_block * sb, const char * function, 280void __ext4_std_error(struct super_block *sb, const char *function, int errno)
282 int errno)
283{ 281{
284 char nbuf[16]; 282 char nbuf[16];
285 const char *errstr; 283 const char *errstr;
@@ -292,8 +290,8 @@ void __ext4_std_error (struct super_block * sb, const char * function,
292 return; 290 return;
293 291
294 errstr = ext4_decode_error(sb, errno, nbuf); 292 errstr = ext4_decode_error(sb, errno, nbuf);
295 printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 293 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
296 sb->s_id, function, errstr); 294 sb->s_id, function, errstr);
297 295
298 ext4_handle_error(sb); 296 ext4_handle_error(sb);
299} 297}
@@ -308,15 +306,15 @@ void __ext4_std_error (struct super_block * sb, const char * function,
308 * case we take the easy way out and panic immediately. 306 * case we take the easy way out and panic immediately.
309 */ 307 */
310 308
311void ext4_abort (struct super_block * sb, const char * function, 309void ext4_abort(struct super_block *sb, const char *function,
312 const char * fmt, ...) 310 const char *fmt, ...)
313{ 311{
314 va_list args; 312 va_list args;
315 313
316 printk (KERN_CRIT "ext4_abort called.\n"); 314 printk(KERN_CRIT "ext4_abort called.\n");
317 315
318 va_start(args, fmt); 316 va_start(args, fmt);
319 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); 317 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
320 vprintk(fmt, args); 318 vprintk(fmt, args);
321 printk("\n"); 319 printk("\n");
322 va_end(args); 320 va_end(args);
@@ -334,8 +332,8 @@ void ext4_abort (struct super_block * sb, const char * function,
334 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 332 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
335} 333}
336 334
337void ext4_warning (struct super_block * sb, const char * function, 335void ext4_warning(struct super_block *sb, const char *function,
338 const char * fmt, ...) 336 const char *fmt, ...)
339{ 337{
340 va_list args; 338 va_list args;
341 339
@@ -496,7 +494,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
496 } 494 }
497} 495}
498 496
499static void ext4_put_super (struct super_block * sb) 497static void ext4_put_super(struct super_block *sb)
500{ 498{
501 struct ext4_sb_info *sbi = EXT4_SB(sb); 499 struct ext4_sb_info *sbi = EXT4_SB(sb);
502 struct ext4_super_block *es = sbi->s_es; 500 struct ext4_super_block *es = sbi->s_es;
@@ -647,7 +645,8 @@ static void ext4_clear_inode(struct inode *inode)
647 &EXT4_I(inode)->jinode); 645 &EXT4_I(inode)->jinode);
648} 646}
649 647
650static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) 648static inline void ext4_show_quota_options(struct seq_file *seq,
649 struct super_block *sb)
651{ 650{
652#if defined(CONFIG_QUOTA) 651#if defined(CONFIG_QUOTA)
653 struct ext4_sb_info *sbi = EXT4_SB(sb); 652 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -822,8 +821,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
822} 821}
823 822
824#ifdef CONFIG_QUOTA 823#ifdef CONFIG_QUOTA
825#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 824#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group")
826#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 825#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
827 826
828static int ext4_dquot_initialize(struct inode *inode, int type); 827static int ext4_dquot_initialize(struct inode *inode, int type);
829static int ext4_dquot_drop(struct inode *inode); 828static int ext4_dquot_drop(struct inode *inode);
@@ -991,12 +990,12 @@ static ext4_fsblk_t get_sb_block(void **data)
991 return sb_block; 990 return sb_block;
992} 991}
993 992
994static int parse_options (char *options, struct super_block *sb, 993static int parse_options(char *options, struct super_block *sb,
995 unsigned int *inum, unsigned long *journal_devnum, 994 unsigned int *inum, unsigned long *journal_devnum,
996 ext4_fsblk_t *n_blocks_count, int is_remount) 995 ext4_fsblk_t *n_blocks_count, int is_remount)
997{ 996{
998 struct ext4_sb_info *sbi = EXT4_SB(sb); 997 struct ext4_sb_info *sbi = EXT4_SB(sb);
999 char * p; 998 char *p;
1000 substring_t args[MAX_OPT_ARGS]; 999 substring_t args[MAX_OPT_ARGS];
1001 int data_opt = 0; 1000 int data_opt = 0;
1002 int option; 1001 int option;
@@ -1009,7 +1008,7 @@ static int parse_options (char *options, struct super_block *sb,
1009 if (!options) 1008 if (!options)
1010 return 1; 1009 return 1;
1011 1010
1012 while ((p = strsep (&options, ",")) != NULL) { 1011 while ((p = strsep(&options, ",")) != NULL) {
1013 int token; 1012 int token;
1014 if (!*p) 1013 if (!*p)
1015 continue; 1014 continue;
@@ -1017,16 +1016,16 @@ static int parse_options (char *options, struct super_block *sb,
1017 token = match_token(p, tokens, args); 1016 token = match_token(p, tokens, args);
1018 switch (token) { 1017 switch (token) {
1019 case Opt_bsd_df: 1018 case Opt_bsd_df:
1020 clear_opt (sbi->s_mount_opt, MINIX_DF); 1019 clear_opt(sbi->s_mount_opt, MINIX_DF);
1021 break; 1020 break;
1022 case Opt_minix_df: 1021 case Opt_minix_df:
1023 set_opt (sbi->s_mount_opt, MINIX_DF); 1022 set_opt(sbi->s_mount_opt, MINIX_DF);
1024 break; 1023 break;
1025 case Opt_grpid: 1024 case Opt_grpid:
1026 set_opt (sbi->s_mount_opt, GRPID); 1025 set_opt(sbi->s_mount_opt, GRPID);
1027 break; 1026 break;
1028 case Opt_nogrpid: 1027 case Opt_nogrpid:
1029 clear_opt (sbi->s_mount_opt, GRPID); 1028 clear_opt(sbi->s_mount_opt, GRPID);
1030 break; 1029 break;
1031 case Opt_resuid: 1030 case Opt_resuid:
1032 if (match_int(&args[0], &option)) 1031 if (match_int(&args[0], &option))
@@ -1043,41 +1042,41 @@ static int parse_options (char *options, struct super_block *sb,
1043 /* *sb_block = match_int(&args[0]); */ 1042 /* *sb_block = match_int(&args[0]); */
1044 break; 1043 break;
1045 case Opt_err_panic: 1044 case Opt_err_panic:
1046 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1045 clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1047 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1046 clear_opt(sbi->s_mount_opt, ERRORS_RO);
1048 set_opt (sbi->s_mount_opt, ERRORS_PANIC); 1047 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1049 break; 1048 break;
1050 case Opt_err_ro: 1049 case Opt_err_ro:
1051 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1050 clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1052 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1051 clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1053 set_opt (sbi->s_mount_opt, ERRORS_RO); 1052 set_opt(sbi->s_mount_opt, ERRORS_RO);
1054 break; 1053 break;
1055 case Opt_err_cont: 1054 case Opt_err_cont:
1056 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1055 clear_opt(sbi->s_mount_opt, ERRORS_RO);
1057 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1056 clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1058 set_opt (sbi->s_mount_opt, ERRORS_CONT); 1057 set_opt(sbi->s_mount_opt, ERRORS_CONT);
1059 break; 1058 break;
1060 case Opt_nouid32: 1059 case Opt_nouid32:
1061 set_opt (sbi->s_mount_opt, NO_UID32); 1060 set_opt(sbi->s_mount_opt, NO_UID32);
1062 break; 1061 break;
1063 case Opt_nocheck: 1062 case Opt_nocheck:
1064 clear_opt (sbi->s_mount_opt, CHECK); 1063 clear_opt(sbi->s_mount_opt, CHECK);
1065 break; 1064 break;
1066 case Opt_debug: 1065 case Opt_debug:
1067 set_opt (sbi->s_mount_opt, DEBUG); 1066 set_opt(sbi->s_mount_opt, DEBUG);
1068 break; 1067 break;
1069 case Opt_oldalloc: 1068 case Opt_oldalloc:
1070 set_opt (sbi->s_mount_opt, OLDALLOC); 1069 set_opt(sbi->s_mount_opt, OLDALLOC);
1071 break; 1070 break;
1072 case Opt_orlov: 1071 case Opt_orlov:
1073 clear_opt (sbi->s_mount_opt, OLDALLOC); 1072 clear_opt(sbi->s_mount_opt, OLDALLOC);
1074 break; 1073 break;
1075#ifdef CONFIG_EXT4DEV_FS_XATTR 1074#ifdef CONFIG_EXT4DEV_FS_XATTR
1076 case Opt_user_xattr: 1075 case Opt_user_xattr:
1077 set_opt (sbi->s_mount_opt, XATTR_USER); 1076 set_opt(sbi->s_mount_opt, XATTR_USER);
1078 break; 1077 break;
1079 case Opt_nouser_xattr: 1078 case Opt_nouser_xattr:
1080 clear_opt (sbi->s_mount_opt, XATTR_USER); 1079 clear_opt(sbi->s_mount_opt, XATTR_USER);
1081 break; 1080 break;
1082#else 1081#else
1083 case Opt_user_xattr: 1082 case Opt_user_xattr:
@@ -1115,7 +1114,7 @@ static int parse_options (char *options, struct super_block *sb,
1115 "journal on remount\n"); 1114 "journal on remount\n");
1116 return 0; 1115 return 0;
1117 } 1116 }
1118 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 1117 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1119 break; 1118 break;
1120 case Opt_journal_inum: 1119 case Opt_journal_inum:
1121 if (is_remount) { 1120 if (is_remount) {
@@ -1145,7 +1144,7 @@ static int parse_options (char *options, struct super_block *sb,
1145 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1144 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1146 break; 1145 break;
1147 case Opt_noload: 1146 case Opt_noload:
1148 set_opt (sbi->s_mount_opt, NOLOAD); 1147 set_opt(sbi->s_mount_opt, NOLOAD);
1149 break; 1148 break;
1150 case Opt_commit: 1149 case Opt_commit:
1151 if (match_int(&args[0], &option)) 1150 if (match_int(&args[0], &option))
@@ -1331,7 +1330,7 @@ set_qf_format:
1331 "on this filesystem, use tune2fs\n"); 1330 "on this filesystem, use tune2fs\n");
1332 return 0; 1331 return 0;
1333 } 1332 }
1334 set_opt (sbi->s_mount_opt, EXTENTS); 1333 set_opt(sbi->s_mount_opt, EXTENTS);
1335 break; 1334 break;
1336 case Opt_noextents: 1335 case Opt_noextents:
1337 /* 1336 /*
@@ -1348,7 +1347,7 @@ set_qf_format:
1348 "-o noextents options\n"); 1347 "-o noextents options\n");
1349 return 0; 1348 return 0;
1350 } 1349 }
1351 clear_opt (sbi->s_mount_opt, EXTENTS); 1350 clear_opt(sbi->s_mount_opt, EXTENTS);
1352 break; 1351 break;
1353 case Opt_i_version: 1352 case Opt_i_version:
1354 set_opt(sbi->s_mount_opt, I_VERSION); 1353 set_opt(sbi->s_mount_opt, I_VERSION);
@@ -1374,9 +1373,9 @@ set_qf_format:
1374 set_opt(sbi->s_mount_opt, DELALLOC); 1373 set_opt(sbi->s_mount_opt, DELALLOC);
1375 break; 1374 break;
1376 default: 1375 default:
1377 printk (KERN_ERR 1376 printk(KERN_ERR
1378 "EXT4-fs: Unrecognized mount option \"%s\" " 1377 "EXT4-fs: Unrecognized mount option \"%s\" "
1379 "or missing value\n", p); 1378 "or missing value\n", p);
1380 return 0; 1379 return 0;
1381 } 1380 }
1382 } 1381 }
@@ -1423,31 +1422,31 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1423 int res = 0; 1422 int res = 0;
1424 1423
1425 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1424 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1426 printk (KERN_ERR "EXT4-fs warning: revision level too high, " 1425 printk(KERN_ERR "EXT4-fs warning: revision level too high, "
1427 "forcing read-only mode\n"); 1426 "forcing read-only mode\n");
1428 res = MS_RDONLY; 1427 res = MS_RDONLY;
1429 } 1428 }
1430 if (read_only) 1429 if (read_only)
1431 return res; 1430 return res;
1432 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1431 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1433 printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " 1432 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
1434 "running e2fsck is recommended\n"); 1433 "running e2fsck is recommended\n");
1435 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1434 else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1436 printk (KERN_WARNING 1435 printk(KERN_WARNING
1437 "EXT4-fs warning: mounting fs with errors, " 1436 "EXT4-fs warning: mounting fs with errors, "
1438 "running e2fsck is recommended\n"); 1437 "running e2fsck is recommended\n");
1439 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1438 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1440 le16_to_cpu(es->s_mnt_count) >= 1439 le16_to_cpu(es->s_mnt_count) >=
1441 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1440 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1442 printk (KERN_WARNING 1441 printk(KERN_WARNING
1443 "EXT4-fs warning: maximal mount count reached, " 1442 "EXT4-fs warning: maximal mount count reached, "
1444 "running e2fsck is recommended\n"); 1443 "running e2fsck is recommended\n");
1445 else if (le32_to_cpu(es->s_checkinterval) && 1444 else if (le32_to_cpu(es->s_checkinterval) &&
1446 (le32_to_cpu(es->s_lastcheck) + 1445 (le32_to_cpu(es->s_lastcheck) +
1447 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1446 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1448 printk (KERN_WARNING 1447 printk(KERN_WARNING
1449 "EXT4-fs warning: checktime reached, " 1448 "EXT4-fs warning: checktime reached, "
1450 "running e2fsck is recommended\n"); 1449 "running e2fsck is recommended\n");
1451#if 0 1450#if 0
1452 /* @@@ We _will_ want to clear the valid bit if we find 1451 /* @@@ We _will_ want to clear the valid bit if we find
1453 * inconsistencies, to force a fsck at reboot. But for 1452 * inconsistencies, to force a fsck at reboot. But for
@@ -1506,14 +1505,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
1506 1505
1507 flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / 1506 flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
1508 groups_per_flex; 1507 groups_per_flex;
1509 sbi->s_flex_groups = kmalloc(flex_group_count * 1508 sbi->s_flex_groups = kzalloc(flex_group_count *
1510 sizeof(struct flex_groups), GFP_KERNEL); 1509 sizeof(struct flex_groups), GFP_KERNEL);
1511 if (sbi->s_flex_groups == NULL) { 1510 if (sbi->s_flex_groups == NULL) {
1512 printk(KERN_ERR "EXT4-fs: not enough memory\n"); 1511 printk(KERN_ERR "EXT4-fs: not enough memory for "
1512 "%lu flex groups\n", flex_group_count);
1513 goto failed; 1513 goto failed;
1514 } 1514 }
1515 memset(sbi->s_flex_groups, 0, flex_group_count *
1516 sizeof(struct flex_groups));
1517 1515
1518 gdp = ext4_get_group_desc(sb, 1, &bh); 1516 gdp = ext4_get_group_desc(sb, 1, &bh);
1519 block_bitmap = ext4_block_bitmap(sb, gdp) - 1; 1517 block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
@@ -1597,16 +1595,14 @@ static int ext4_check_descriptors(struct super_block *sb)
1597 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1595 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1598 1596
1599 block_bitmap = ext4_block_bitmap(sb, gdp); 1597 block_bitmap = ext4_block_bitmap(sb, gdp);
1600 if (block_bitmap < first_block || block_bitmap > last_block) 1598 if (block_bitmap < first_block || block_bitmap > last_block) {
1601 {
1602 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1599 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1603 "Block bitmap for group %lu not in group " 1600 "Block bitmap for group %lu not in group "
1604 "(block %llu)!", i, block_bitmap); 1601 "(block %llu)!", i, block_bitmap);
1605 return 0; 1602 return 0;
1606 } 1603 }
1607 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1604 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1608 if (inode_bitmap < first_block || inode_bitmap > last_block) 1605 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1609 {
1610 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1606 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1611 "Inode bitmap for group %lu not in group " 1607 "Inode bitmap for group %lu not in group "
1612 "(block %llu)!", i, inode_bitmap); 1608 "(block %llu)!", i, inode_bitmap);
@@ -1614,26 +1610,28 @@ static int ext4_check_descriptors(struct super_block *sb)
1614 } 1610 }
1615 inode_table = ext4_inode_table(sb, gdp); 1611 inode_table = ext4_inode_table(sb, gdp);
1616 if (inode_table < first_block || 1612 if (inode_table < first_block ||
1617 inode_table + sbi->s_itb_per_group - 1 > last_block) 1613 inode_table + sbi->s_itb_per_group - 1 > last_block) {
1618 {
1619 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1614 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1620 "Inode table for group %lu not in group " 1615 "Inode table for group %lu not in group "
1621 "(block %llu)!", i, inode_table); 1616 "(block %llu)!", i, inode_table);
1622 return 0; 1617 return 0;
1623 } 1618 }
1619 spin_lock(sb_bgl_lock(sbi, i));
1624 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1620 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1625 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1621 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1626 "Checksum for group %lu failed (%u!=%u)\n", 1622 "Checksum for group %lu failed (%u!=%u)\n",
1627 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1623 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1628 gdp)), le16_to_cpu(gdp->bg_checksum)); 1624 gdp)), le16_to_cpu(gdp->bg_checksum));
1629 return 0; 1625 if (!(sb->s_flags & MS_RDONLY))
1626 return 0;
1630 } 1627 }
1628 spin_unlock(sb_bgl_lock(sbi, i));
1631 if (!flexbg_flag) 1629 if (!flexbg_flag)
1632 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1630 first_block += EXT4_BLOCKS_PER_GROUP(sb);
1633 } 1631 }
1634 1632
1635 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1633 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1636 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb)); 1634 sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
1637 return 1; 1635 return 1;
1638} 1636}
1639 1637
@@ -1654,8 +1652,8 @@ static int ext4_check_descriptors(struct super_block *sb)
1654 * e2fsck was run on this filesystem, and it must have already done the orphan 1652 * e2fsck was run on this filesystem, and it must have already done the orphan
1655 * inode cleanup for us, so we can safely abort without any further action. 1653 * inode cleanup for us, so we can safely abort without any further action.
1656 */ 1654 */
1657static void ext4_orphan_cleanup (struct super_block * sb, 1655static void ext4_orphan_cleanup(struct super_block *sb,
1658 struct ext4_super_block * es) 1656 struct ext4_super_block *es)
1659{ 1657{
1660 unsigned int s_flags = sb->s_flags; 1658 unsigned int s_flags = sb->s_flags;
1661 int nr_orphans = 0, nr_truncates = 0; 1659 int nr_orphans = 0, nr_truncates = 0;
@@ -1732,7 +1730,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1732 iput(inode); /* The delete magic happens here! */ 1730 iput(inode); /* The delete magic happens here! */
1733 } 1731 }
1734 1732
1735#define PLURAL(x) (x), ((x)==1) ? "" : "s" 1733#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1736 1734
1737 if (nr_orphans) 1735 if (nr_orphans)
1738 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", 1736 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
@@ -1899,12 +1897,12 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
1899 return 0; 1897 return 0;
1900} 1898}
1901 1899
1902static int ext4_fill_super (struct super_block *sb, void *data, int silent) 1900static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1903 __releases(kernel_lock) 1901 __releases(kernel_lock)
1904 __acquires(kernel_lock) 1902 __acquires(kernel_lock)
1905 1903
1906{ 1904{
1907 struct buffer_head * bh; 1905 struct buffer_head *bh;
1908 struct ext4_super_block *es = NULL; 1906 struct ext4_super_block *es = NULL;
1909 struct ext4_sb_info *sbi; 1907 struct ext4_sb_info *sbi;
1910 ext4_fsblk_t block; 1908 ext4_fsblk_t block;
@@ -1953,7 +1951,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1953 } 1951 }
1954 1952
1955 if (!(bh = sb_bread(sb, logical_sb_block))) { 1953 if (!(bh = sb_bread(sb, logical_sb_block))) {
1956 printk (KERN_ERR "EXT4-fs: unable to read superblock\n"); 1954 printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
1957 goto out_fail; 1955 goto out_fail;
1958 } 1956 }
1959 /* 1957 /*
@@ -2026,8 +2024,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2026 set_opt(sbi->s_mount_opt, DELALLOC); 2024 set_opt(sbi->s_mount_opt, DELALLOC);
2027 2025
2028 2026
2029 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 2027 if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum,
2030 NULL, 0)) 2028 NULL, 0))
2031 goto failed_mount; 2029 goto failed_mount;
2032 2030
2033 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2031 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -2102,7 +2100,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2102 goto failed_mount; 2100 goto failed_mount;
2103 } 2101 }
2104 2102
2105 brelse (bh); 2103 brelse(bh);
2106 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2104 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2107 offset = do_div(logical_sb_block, blocksize); 2105 offset = do_div(logical_sb_block, blocksize);
2108 bh = sb_bread(sb, logical_sb_block); 2106 bh = sb_bread(sb, logical_sb_block);
@@ -2114,8 +2112,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2114 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2112 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2115 sbi->s_es = es; 2113 sbi->s_es = es;
2116 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2114 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2117 printk (KERN_ERR 2115 printk(KERN_ERR
2118 "EXT4-fs: Magic mismatch, very weird !\n"); 2116 "EXT4-fs: Magic mismatch, very weird !\n");
2119 goto failed_mount; 2117 goto failed_mount;
2120 } 2118 }
2121 } 2119 }
@@ -2132,9 +2130,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2132 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2130 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2133 (!is_power_of_2(sbi->s_inode_size)) || 2131 (!is_power_of_2(sbi->s_inode_size)) ||
2134 (sbi->s_inode_size > blocksize)) { 2132 (sbi->s_inode_size > blocksize)) {
2135 printk (KERN_ERR 2133 printk(KERN_ERR
2136 "EXT4-fs: unsupported inode size: %d\n", 2134 "EXT4-fs: unsupported inode size: %d\n",
2137 sbi->s_inode_size); 2135 sbi->s_inode_size);
2138 goto failed_mount; 2136 goto failed_mount;
2139 } 2137 }
2140 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2138 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
@@ -2166,20 +2164,20 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2166 sbi->s_mount_state = le16_to_cpu(es->s_state); 2164 sbi->s_mount_state = le16_to_cpu(es->s_state);
2167 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2165 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2168 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2166 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2169 for (i=0; i < 4; i++) 2167 for (i = 0; i < 4; i++)
2170 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2168 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2171 sbi->s_def_hash_version = es->s_def_hash_version; 2169 sbi->s_def_hash_version = es->s_def_hash_version;
2172 2170
2173 if (sbi->s_blocks_per_group > blocksize * 8) { 2171 if (sbi->s_blocks_per_group > blocksize * 8) {
2174 printk (KERN_ERR 2172 printk(KERN_ERR
2175 "EXT4-fs: #blocks per group too big: %lu\n", 2173 "EXT4-fs: #blocks per group too big: %lu\n",
2176 sbi->s_blocks_per_group); 2174 sbi->s_blocks_per_group);
2177 goto failed_mount; 2175 goto failed_mount;
2178 } 2176 }
2179 if (sbi->s_inodes_per_group > blocksize * 8) { 2177 if (sbi->s_inodes_per_group > blocksize * 8) {
2180 printk (KERN_ERR 2178 printk(KERN_ERR
2181 "EXT4-fs: #inodes per group too big: %lu\n", 2179 "EXT4-fs: #inodes per group too big: %lu\n",
2182 sbi->s_inodes_per_group); 2180 sbi->s_inodes_per_group);
2183 goto failed_mount; 2181 goto failed_mount;
2184 } 2182 }
2185 2183
@@ -2213,10 +2211,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2213 sbi->s_groups_count = blocks_count; 2211 sbi->s_groups_count = blocks_count;
2214 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2212 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2215 EXT4_DESC_PER_BLOCK(sb); 2213 EXT4_DESC_PER_BLOCK(sb);
2216 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 2214 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2217 GFP_KERNEL); 2215 GFP_KERNEL);
2218 if (sbi->s_group_desc == NULL) { 2216 if (sbi->s_group_desc == NULL) {
2219 printk (KERN_ERR "EXT4-fs: not enough memory\n"); 2217 printk(KERN_ERR "EXT4-fs: not enough memory\n");
2220 goto failed_mount; 2218 goto failed_mount;
2221 } 2219 }
2222 2220
@@ -2226,13 +2224,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2226 block = descriptor_loc(sb, logical_sb_block, i); 2224 block = descriptor_loc(sb, logical_sb_block, i);
2227 sbi->s_group_desc[i] = sb_bread(sb, block); 2225 sbi->s_group_desc[i] = sb_bread(sb, block);
2228 if (!sbi->s_group_desc[i]) { 2226 if (!sbi->s_group_desc[i]) {
2229 printk (KERN_ERR "EXT4-fs: " 2227 printk(KERN_ERR "EXT4-fs: "
2230 "can't read group descriptor %d\n", i); 2228 "can't read group descriptor %d\n", i);
2231 db_count = i; 2229 db_count = i;
2232 goto failed_mount2; 2230 goto failed_mount2;
2233 } 2231 }
2234 } 2232 }
2235 if (!ext4_check_descriptors (sb)) { 2233 if (!ext4_check_descriptors(sb)) {
2236 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 2234 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
2237 goto failed_mount2; 2235 goto failed_mount2;
2238 } 2236 }
@@ -2308,11 +2306,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2308 EXT4_SB(sb)->s_journal->j_failed_commit) { 2306 EXT4_SB(sb)->s_journal->j_failed_commit) {
2309 printk(KERN_CRIT "EXT4-fs error (device %s): " 2307 printk(KERN_CRIT "EXT4-fs error (device %s): "
2310 "ext4_fill_super: Journal transaction " 2308 "ext4_fill_super: Journal transaction "
2311 "%u is corrupt\n", sb->s_id, 2309 "%u is corrupt\n", sb->s_id,
2312 EXT4_SB(sb)->s_journal->j_failed_commit); 2310 EXT4_SB(sb)->s_journal->j_failed_commit);
2313 if (test_opt (sb, ERRORS_RO)) { 2311 if (test_opt(sb, ERRORS_RO)) {
2314 printk (KERN_CRIT 2312 printk(KERN_CRIT
2315 "Mounting filesystem read-only\n"); 2313 "Mounting filesystem read-only\n");
2316 sb->s_flags |= MS_RDONLY; 2314 sb->s_flags |= MS_RDONLY;
2317 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2315 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2318 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2316 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2332,9 +2330,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2332 goto failed_mount3; 2330 goto failed_mount3;
2333 } else { 2331 } else {
2334 if (!silent) 2332 if (!silent)
2335 printk (KERN_ERR 2333 printk(KERN_ERR
2336 "ext4: No journal on filesystem on %s\n", 2334 "ext4: No journal on filesystem on %s\n",
2337 sb->s_id); 2335 sb->s_id);
2338 goto failed_mount3; 2336 goto failed_mount3;
2339 } 2337 }
2340 2338
@@ -2418,7 +2416,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2418 goto failed_mount4; 2416 goto failed_mount4;
2419 } 2417 }
2420 2418
2421 ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); 2419 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
2422 2420
2423 /* determine the minimum size of new large inodes, if present */ 2421 /* determine the minimum size of new large inodes, if present */
2424 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 2422 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
@@ -2457,12 +2455,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2457 ext4_orphan_cleanup(sb, es); 2455 ext4_orphan_cleanup(sb, es);
2458 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2456 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2459 if (needs_recovery) 2457 if (needs_recovery)
2460 printk (KERN_INFO "EXT4-fs: recovery complete.\n"); 2458 printk(KERN_INFO "EXT4-fs: recovery complete.\n");
2461 ext4_mark_recovery_complete(sb, es); 2459 ext4_mark_recovery_complete(sb, es);
2462 printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", 2460 printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
2463 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": 2461 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
2464 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": 2462 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
2465 "writeback"); 2463 "writeback");
2466 2464
2467 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2465 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2468 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " 2466 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
@@ -2575,14 +2573,14 @@ static journal_t *ext4_get_journal(struct super_block *sb,
2575static journal_t *ext4_get_dev_journal(struct super_block *sb, 2573static journal_t *ext4_get_dev_journal(struct super_block *sb,
2576 dev_t j_dev) 2574 dev_t j_dev)
2577{ 2575{
2578 struct buffer_head * bh; 2576 struct buffer_head *bh;
2579 journal_t *journal; 2577 journal_t *journal;
2580 ext4_fsblk_t start; 2578 ext4_fsblk_t start;
2581 ext4_fsblk_t len; 2579 ext4_fsblk_t len;
2582 int hblock, blocksize; 2580 int hblock, blocksize;
2583 ext4_fsblk_t sb_block; 2581 ext4_fsblk_t sb_block;
2584 unsigned long offset; 2582 unsigned long offset;
2585 struct ext4_super_block * es; 2583 struct ext4_super_block *es;
2586 struct block_device *bdev; 2584 struct block_device *bdev;
2587 2585
2588 bdev = ext4_blkdev_get(j_dev); 2586 bdev = ext4_blkdev_get(j_dev);
@@ -2697,8 +2695,8 @@ static int ext4_load_journal(struct super_block *sb,
2697 "unavailable, cannot proceed.\n"); 2695 "unavailable, cannot proceed.\n");
2698 return -EROFS; 2696 return -EROFS;
2699 } 2697 }
2700 printk (KERN_INFO "EXT4-fs: write access will " 2698 printk(KERN_INFO "EXT4-fs: write access will "
2701 "be enabled during recovery.\n"); 2699 "be enabled during recovery.\n");
2702 } 2700 }
2703 } 2701 }
2704 2702
@@ -2751,8 +2749,8 @@ static int ext4_load_journal(struct super_block *sb,
2751 return 0; 2749 return 0;
2752} 2750}
2753 2751
2754static int ext4_create_journal(struct super_block * sb, 2752static int ext4_create_journal(struct super_block *sb,
2755 struct ext4_super_block * es, 2753 struct ext4_super_block *es,
2756 unsigned int journal_inum) 2754 unsigned int journal_inum)
2757{ 2755{
2758 journal_t *journal; 2756 journal_t *journal;
@@ -2793,9 +2791,8 @@ static int ext4_create_journal(struct super_block * sb,
2793 return 0; 2791 return 0;
2794} 2792}
2795 2793
2796static void ext4_commit_super (struct super_block * sb, 2794static void ext4_commit_super(struct super_block *sb,
2797 struct ext4_super_block * es, 2795 struct ext4_super_block *es, int sync)
2798 int sync)
2799{ 2796{
2800 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 2797 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
2801 2798
@@ -2816,8 +2813,8 @@ static void ext4_commit_super (struct super_block * sb,
2816 * remounting) the filesystem readonly, then we will end up with a 2813 * remounting) the filesystem readonly, then we will end up with a
2817 * consistent fs on disk. Record that fact. 2814 * consistent fs on disk. Record that fact.
2818 */ 2815 */
2819static void ext4_mark_recovery_complete(struct super_block * sb, 2816static void ext4_mark_recovery_complete(struct super_block *sb,
2820 struct ext4_super_block * es) 2817 struct ext4_super_block *es)
2821{ 2818{
2822 journal_t *journal = EXT4_SB(sb)->s_journal; 2819 journal_t *journal = EXT4_SB(sb)->s_journal;
2823 2820
@@ -2839,8 +2836,8 @@ static void ext4_mark_recovery_complete(struct super_block * sb,
2839 * has recorded an error from a previous lifetime, move that error to the 2836 * has recorded an error from a previous lifetime, move that error to the
2840 * main filesystem now. 2837 * main filesystem now.
2841 */ 2838 */
2842static void ext4_clear_journal_err(struct super_block * sb, 2839static void ext4_clear_journal_err(struct super_block *sb,
2843 struct ext4_super_block * es) 2840 struct ext4_super_block *es)
2844{ 2841{
2845 journal_t *journal; 2842 journal_t *journal;
2846 int j_errno; 2843 int j_errno;
@@ -2865,7 +2862,7 @@ static void ext4_clear_journal_err(struct super_block * sb,
2865 2862
2866 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2863 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2867 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2864 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2868 ext4_commit_super (sb, es, 1); 2865 ext4_commit_super(sb, es, 1);
2869 2866
2870 jbd2_journal_clear_err(journal); 2867 jbd2_journal_clear_err(journal);
2871 } 2868 }
@@ -2898,7 +2895,7 @@ int ext4_force_commit(struct super_block *sb)
2898 * This implicitly triggers the writebehind on sync(). 2895 * This implicitly triggers the writebehind on sync().
2899 */ 2896 */
2900 2897
2901static void ext4_write_super (struct super_block * sb) 2898static void ext4_write_super(struct super_block *sb)
2902{ 2899{
2903 if (mutex_trylock(&sb->s_lock) != 0) 2900 if (mutex_trylock(&sb->s_lock) != 0)
2904 BUG(); 2901 BUG();
@@ -2954,13 +2951,14 @@ static void ext4_unlockfs(struct super_block *sb)
2954 } 2951 }
2955} 2952}
2956 2953
2957static int ext4_remount (struct super_block * sb, int * flags, char * data) 2954static int ext4_remount(struct super_block *sb, int *flags, char *data)
2958{ 2955{
2959 struct ext4_super_block * es; 2956 struct ext4_super_block *es;
2960 struct ext4_sb_info *sbi = EXT4_SB(sb); 2957 struct ext4_sb_info *sbi = EXT4_SB(sb);
2961 ext4_fsblk_t n_blocks_count = 0; 2958 ext4_fsblk_t n_blocks_count = 0;
2962 unsigned long old_sb_flags; 2959 unsigned long old_sb_flags;
2963 struct ext4_mount_options old_opts; 2960 struct ext4_mount_options old_opts;
2961 ext4_group_t g;
2964 int err; 2962 int err;
2965#ifdef CONFIG_QUOTA 2963#ifdef CONFIG_QUOTA
2966 int i; 2964 int i;
@@ -3039,6 +3037,26 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
3039 } 3037 }
3040 3038
3041 /* 3039 /*
3040 * Make sure the group descriptor checksums
3041 * are sane. If they aren't, refuse to
3042 * remount r/w.
3043 */
3044 for (g = 0; g < sbi->s_groups_count; g++) {
3045 struct ext4_group_desc *gdp =
3046 ext4_get_group_desc(sb, g, NULL);
3047
3048 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3049 printk(KERN_ERR
3050 "EXT4-fs: ext4_remount: "
3051 "Checksum for group %lu failed (%u!=%u)\n",
3052 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3053 le16_to_cpu(gdp->bg_checksum));
3054 err = -EINVAL;
3055 goto restore_opts;
3056 }
3057 }
3058
3059 /*
3042 * If we have an unprocessed orphan list hanging 3060 * If we have an unprocessed orphan list hanging
3043 * around from a previously readonly bdev mount, 3061 * around from a previously readonly bdev mount,
3044 * require a full umount/remount for now. 3062 * require a full umount/remount for now.
@@ -3063,7 +3081,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
3063 sbi->s_mount_state = le16_to_cpu(es->s_state); 3081 sbi->s_mount_state = le16_to_cpu(es->s_state);
3064 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3082 if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3065 goto restore_opts; 3083 goto restore_opts;
3066 if (!ext4_setup_super (sb, es, 0)) 3084 if (!ext4_setup_super(sb, es, 0))
3067 sb->s_flags &= ~MS_RDONLY; 3085 sb->s_flags &= ~MS_RDONLY;
3068 } 3086 }
3069 } 3087 }
@@ -3093,7 +3111,7 @@ restore_opts:
3093 return err; 3111 return err;
3094} 3112}
3095 3113
3096static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) 3114static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3097{ 3115{
3098 struct super_block *sb = dentry->d_sb; 3116 struct super_block *sb = dentry->d_sb;
3099 struct ext4_sb_info *sbi = EXT4_SB(sb); 3117 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3331,12 +3349,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3331 } 3349 }
3332 /* Journaling quota? */ 3350 /* Journaling quota? */
3333 if (EXT4_SB(sb)->s_qf_names[type]) { 3351 if (EXT4_SB(sb)->s_qf_names[type]) {
3334 /* Quotafile not of fs root? */ 3352 /* Quotafile not in fs root? */
3335 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 3353 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
3336 printk(KERN_WARNING 3354 printk(KERN_WARNING
3337 "EXT4-fs: Quota file not on filesystem root. " 3355 "EXT4-fs: Quota file not on filesystem root. "
3338 "Journaled quota will not work.\n"); 3356 "Journaled quota will not work.\n");
3339 } 3357 }
3340 3358
3341 /* 3359 /*
3342 * When we journal data on quota file, we have to flush journal to see 3360 * When we journal data on quota file, we have to flush journal to see
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 93c5fdcdad2e..8954208b4893 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1512,7 +1512,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
1512 char *name = entry->e_name; 1512 char *name = entry->e_name;
1513 int n; 1513 int n;
1514 1514
1515 for (n=0; n < entry->e_name_len; n++) { 1515 for (n = 0; n < entry->e_name_len; n++) {
1516 hash = (hash << NAME_HASH_SHIFT) ^ 1516 hash = (hash << NAME_HASH_SHIFT) ^
1517 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ 1517 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1518 *name++; 1518 *name++;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f8b3be873226..adf0395f318e 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -262,8 +262,18 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
262 jinode->i_flags |= JI_COMMIT_RUNNING; 262 jinode->i_flags |= JI_COMMIT_RUNNING;
263 spin_unlock(&journal->j_list_lock); 263 spin_unlock(&journal->j_list_lock);
264 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); 264 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
265 if (!ret) 265 if (err) {
266 ret = err; 266 /*
267 * Because AS_EIO is cleared by
268 * wait_on_page_writeback_range(), set it again so
269 * that user process can get -EIO from fsync().
270 */
271 set_bit(AS_EIO,
272 &jinode->i_vfs_inode->i_mapping->flags);
273
274 if (!ret)
275 ret = err;
276 }
267 spin_lock(&journal->j_list_lock); 277 spin_lock(&journal->j_list_lock);
268 jinode->i_flags &= ~JI_COMMIT_RUNNING; 278 jinode->i_flags &= ~JI_COMMIT_RUNNING;
269 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 279 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
@@ -670,8 +680,14 @@ start_journal_io:
670 * commit block, which happens below in such setting. 680 * commit block, which happens below in such setting.
671 */ 681 */
672 err = journal_finish_inode_data_buffers(journal, commit_transaction); 682 err = journal_finish_inode_data_buffers(journal, commit_transaction);
673 if (err) 683 if (err) {
674 jbd2_journal_abort(journal, err); 684 char b[BDEVNAME_SIZE];
685
686 printk(KERN_WARNING
687 "JBD2: Detected IO errors while flushing file data "
688 "on %s\n", bdevname(journal->j_fs_dev, b));
689 err = 0;
690 }
675 691
676 /* Lo and behold: we have just managed to send a transaction to 692 /* Lo and behold: we have just managed to send a transaction to
677 the log. Before we can commit it, wait for the IO so far to 693 the log. Before we can commit it, wait for the IO so far to
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b26c6d9fe6ae..8207a01c4edb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(jbd2_journal_set_features);
68EXPORT_SYMBOL(jbd2_journal_create); 68EXPORT_SYMBOL(jbd2_journal_create);
69EXPORT_SYMBOL(jbd2_journal_load); 69EXPORT_SYMBOL(jbd2_journal_load);
70EXPORT_SYMBOL(jbd2_journal_destroy); 70EXPORT_SYMBOL(jbd2_journal_destroy);
71EXPORT_SYMBOL(jbd2_journal_update_superblock);
72EXPORT_SYMBOL(jbd2_journal_abort); 71EXPORT_SYMBOL(jbd2_journal_abort);
73EXPORT_SYMBOL(jbd2_journal_errno); 72EXPORT_SYMBOL(jbd2_journal_errno);
74EXPORT_SYMBOL(jbd2_journal_ack_err); 73EXPORT_SYMBOL(jbd2_journal_ack_err);