aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/balloc.c295
-rw-r--r--fs/ext4/bitmap.c5
-rw-r--r--fs/ext4/dir.c10
-rw-r--r--fs/ext4/ext4.h158
-rw-r--r--fs/ext4/ext4_extents.h5
-rw-r--r--fs/ext4/ext4_i.h16
-rw-r--r--fs/ext4/ext4_jbd2.c83
-rw-r--r--fs/ext4/ext4_jbd2.h87
-rw-r--r--fs/ext4/ext4_sb.h12
-rw-r--r--fs/ext4/extents.c62
-rw-r--r--fs/ext4/file.c3
-rw-r--r--fs/ext4/hash.c77
-rw-r--r--fs/ext4/ialloc.c334
-rw-r--r--fs/ext4/inode.c322
-rw-r--r--fs/ext4/ioctl.c2
-rw-r--r--fs/ext4/mballoc.c629
-rw-r--r--fs/ext4/mballoc.h71
-rw-r--r--fs/ext4/migrate.c19
-rw-r--r--fs/ext4/namei.c113
-rw-r--r--fs/ext4/resize.c113
-rw-r--r--fs/ext4/super.c686
-rw-r--r--fs/ext4/xattr.c25
22 files changed, 1941 insertions, 1186 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index db35cfdb3c8b..6bba06b09dd1 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -20,6 +20,7 @@
20#include "ext4.h" 20#include "ext4.h"
21#include "ext4_jbd2.h" 21#include "ext4_jbd2.h"
22#include "group.h" 22#include "group.h"
23#include "mballoc.h"
23 24
24/* 25/*
25 * balloc.c contains the blocks allocation and deallocation routines 26 * balloc.c contains the blocks allocation and deallocation routines
@@ -100,10 +101,10 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
100 * essentially implementing a per-group read-only flag. */ 101 * essentially implementing a per-group read-only flag. */
101 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 102 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
102 ext4_error(sb, __func__, 103 ext4_error(sb, __func__,
103 "Checksum bad for group %lu\n", block_group); 104 "Checksum bad for group %u", block_group);
104 gdp->bg_free_blocks_count = 0; 105 ext4_free_blks_set(sb, gdp, 0);
105 gdp->bg_free_inodes_count = 0; 106 ext4_free_inodes_set(sb, gdp, 0);
106 gdp->bg_itable_unused = 0; 107 ext4_itable_unused_set(sb, gdp, 0);
107 memset(bh->b_data, 0xff, sb->s_blocksize); 108 memset(bh->b_data, 0xff, sb->s_blocksize);
108 return 0; 109 return 0;
109 } 110 }
@@ -205,15 +206,15 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
205 ext4_group_t block_group, 206 ext4_group_t block_group,
206 struct buffer_head **bh) 207 struct buffer_head **bh)
207{ 208{
208 unsigned long group_desc; 209 unsigned int group_desc;
209 unsigned long offset; 210 unsigned int offset;
210 struct ext4_group_desc *desc; 211 struct ext4_group_desc *desc;
211 struct ext4_sb_info *sbi = EXT4_SB(sb); 212 struct ext4_sb_info *sbi = EXT4_SB(sb);
212 213
213 if (block_group >= sbi->s_groups_count) { 214 if (block_group >= sbi->s_groups_count) {
214 ext4_error(sb, "ext4_get_group_desc", 215 ext4_error(sb, "ext4_get_group_desc",
215 "block_group >= groups_count - " 216 "block_group >= groups_count - "
216 "block_group = %lu, groups_count = %lu", 217 "block_group = %u, groups_count = %u",
217 block_group, sbi->s_groups_count); 218 block_group, sbi->s_groups_count);
218 219
219 return NULL; 220 return NULL;
@@ -225,7 +226,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
225 if (!sbi->s_group_desc[group_desc]) { 226 if (!sbi->s_group_desc[group_desc]) {
226 ext4_error(sb, "ext4_get_group_desc", 227 ext4_error(sb, "ext4_get_group_desc",
227 "Group descriptor not loaded - " 228 "Group descriptor not loaded - "
228 "block_group = %lu, group_desc = %lu, desc = %lu", 229 "block_group = %u, group_desc = %u, desc = %u",
229 block_group, group_desc, offset); 230 block_group, group_desc, offset);
230 return NULL; 231 return NULL;
231 } 232 }
@@ -315,29 +316,50 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
315 if (unlikely(!bh)) { 316 if (unlikely(!bh)) {
316 ext4_error(sb, __func__, 317 ext4_error(sb, __func__,
317 "Cannot read block bitmap - " 318 "Cannot read block bitmap - "
318 "block_group = %lu, block_bitmap = %llu", 319 "block_group = %u, block_bitmap = %llu",
319 block_group, bitmap_blk); 320 block_group, bitmap_blk);
320 return NULL; 321 return NULL;
321 } 322 }
322 if (buffer_uptodate(bh) && 323
323 !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) 324 if (bitmap_uptodate(bh))
324 return bh; 325 return bh;
325 326
326 lock_buffer(bh); 327 lock_buffer(bh);
328 if (bitmap_uptodate(bh)) {
329 unlock_buffer(bh);
330 return bh;
331 }
327 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 332 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
328 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 333 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
329 ext4_init_block_bitmap(sb, bh, block_group, desc); 334 ext4_init_block_bitmap(sb, bh, block_group, desc);
335 set_bitmap_uptodate(bh);
330 set_buffer_uptodate(bh); 336 set_buffer_uptodate(bh);
331 unlock_buffer(bh);
332 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 337 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
338 unlock_buffer(bh);
333 return bh; 339 return bh;
334 } 340 }
335 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 341 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
342 if (buffer_uptodate(bh)) {
343 /*
344 * if not uninit if bh is uptodate,
345 * bitmap is also uptodate
346 */
347 set_bitmap_uptodate(bh);
348 unlock_buffer(bh);
349 return bh;
350 }
351 /*
352 * submit the buffer_head for read. We can
353 * safely mark the bitmap as uptodate now.
354 * We do it here so the bitmap uptodate bit
355 * get set with buffer lock held.
356 */
357 set_bitmap_uptodate(bh);
336 if (bh_submit_read(bh) < 0) { 358 if (bh_submit_read(bh) < 0) {
337 put_bh(bh); 359 put_bh(bh);
338 ext4_error(sb, __func__, 360 ext4_error(sb, __func__,
339 "Cannot read block bitmap - " 361 "Cannot read block bitmap - "
340 "block_group = %lu, block_bitmap = %llu", 362 "block_group = %u, block_bitmap = %llu",
341 block_group, bitmap_blk); 363 block_group, bitmap_blk);
342 return NULL; 364 return NULL;
343 } 365 }
@@ -350,62 +372,44 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
350} 372}
351 373
352/** 374/**
353 * ext4_free_blocks_sb() -- Free given blocks and update quota 375 * ext4_add_groupblocks() -- Add given blocks to an existing group
354 * @handle: handle to this transaction 376 * @handle: handle to this transaction
355 * @sb: super block 377 * @sb: super block
356 * @block: start physcial block to free 378 * @block: start physcial block to add to the block group
357 * @count: number of blocks to free 379 * @count: number of blocks to free
358 * @pdquot_freed_blocks: pointer to quota
359 * 380 *
360 * XXX This function is only used by the on-line resizing code, which 381 * This marks the blocks as free in the bitmap. We ask the
361 * should probably be fixed up to call the mballoc variant. There 382 * mballoc to reload the buddy after this by setting group
362 * this needs to be cleaned up later; in fact, I'm not convinced this 383 * EXT4_GROUP_INFO_NEED_INIT_BIT flag
363 * is 100% correct in the face of the mballoc code. The online resizing
364 * code needs to be fixed up to more tightly (and correctly) interlock
365 * with the mballoc code.
366 */ 384 */
367void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, 385void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
368 ext4_fsblk_t block, unsigned long count, 386 ext4_fsblk_t block, unsigned long count)
369 unsigned long *pdquot_freed_blocks)
370{ 387{
371 struct buffer_head *bitmap_bh = NULL; 388 struct buffer_head *bitmap_bh = NULL;
372 struct buffer_head *gd_bh; 389 struct buffer_head *gd_bh;
373 ext4_group_t block_group; 390 ext4_group_t block_group;
374 ext4_grpblk_t bit; 391 ext4_grpblk_t bit;
375 unsigned long i; 392 unsigned int i;
376 unsigned long overflow;
377 struct ext4_group_desc *desc; 393 struct ext4_group_desc *desc;
378 struct ext4_super_block *es; 394 struct ext4_super_block *es;
379 struct ext4_sb_info *sbi; 395 struct ext4_sb_info *sbi;
380 int err = 0, ret; 396 int err = 0, ret, blk_free_count;
381 ext4_grpblk_t group_freed; 397 ext4_grpblk_t blocks_freed;
398 struct ext4_group_info *grp;
382 399
383 *pdquot_freed_blocks = 0;
384 sbi = EXT4_SB(sb); 400 sbi = EXT4_SB(sb);
385 es = sbi->s_es; 401 es = sbi->s_es;
386 if (block < le32_to_cpu(es->s_first_data_block) || 402 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
387 block + count < block ||
388 block + count > ext4_blocks_count(es)) {
389 ext4_error(sb, "ext4_free_blocks",
390 "Freeing blocks not in datazone - "
391 "block = %llu, count = %lu", block, count);
392 goto error_return;
393 }
394
395 ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1);
396 403
397do_more:
398 overflow = 0;
399 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 404 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
405 grp = ext4_get_group_info(sb, block_group);
400 /* 406 /*
401 * Check to see if we are freeing blocks across a group 407 * Check to see if we are freeing blocks across a group
402 * boundary. 408 * boundary.
403 */ 409 */
404 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { 410 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
405 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); 411 goto error_return;
406 count -= overflow;
407 } 412 }
408 brelse(bitmap_bh);
409 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 413 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
410 if (!bitmap_bh) 414 if (!bitmap_bh)
411 goto error_return; 415 goto error_return;
@@ -418,18 +422,17 @@ do_more:
418 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || 422 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
419 in_range(block + count - 1, ext4_inode_table(sb, desc), 423 in_range(block + count - 1, ext4_inode_table(sb, desc),
420 sbi->s_itb_per_group)) { 424 sbi->s_itb_per_group)) {
421 ext4_error(sb, "ext4_free_blocks", 425 ext4_error(sb, __func__,
422 "Freeing blocks in system zones - " 426 "Adding blocks in system zones - "
423 "Block = %llu, count = %lu", 427 "Block = %llu, count = %lu",
424 block, count); 428 block, count);
425 goto error_return; 429 goto error_return;
426 } 430 }
427 431
428 /* 432 /*
429 * We are about to start releasing blocks in the bitmap, 433 * We are about to add blocks to the bitmap,
430 * so we need undo access. 434 * so we need undo access.
431 */ 435 */
432 /* @@@ check errors */
433 BUFFER_TRACE(bitmap_bh, "getting undo access"); 436 BUFFER_TRACE(bitmap_bh, "getting undo access");
434 err = ext4_journal_get_undo_access(handle, bitmap_bh); 437 err = ext4_journal_get_undo_access(handle, bitmap_bh);
435 if (err) 438 if (err)
@@ -444,107 +447,55 @@ do_more:
444 err = ext4_journal_get_write_access(handle, gd_bh); 447 err = ext4_journal_get_write_access(handle, gd_bh);
445 if (err) 448 if (err)
446 goto error_return; 449 goto error_return;
447 450 /*
448 jbd_lock_bh_state(bitmap_bh); 451 * make sure we don't allow a parallel init on other groups in the
449 452 * same buddy cache
450 for (i = 0, group_freed = 0; i < count; i++) { 453 */
451 /* 454 down_write(&grp->alloc_sem);
452 * An HJ special. This is expensive... 455 for (i = 0, blocks_freed = 0; i < count; i++) {
453 */
454#ifdef CONFIG_JBD2_DEBUG
455 jbd_unlock_bh_state(bitmap_bh);
456 {
457 struct buffer_head *debug_bh;
458 debug_bh = sb_find_get_block(sb, block + i);
459 if (debug_bh) {
460 BUFFER_TRACE(debug_bh, "Deleted!");
461 if (!bh2jh(bitmap_bh)->b_committed_data)
462 BUFFER_TRACE(debug_bh,
463 "No commited data in bitmap");
464 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
465 __brelse(debug_bh);
466 }
467 }
468 jbd_lock_bh_state(bitmap_bh);
469#endif
470 if (need_resched()) {
471 jbd_unlock_bh_state(bitmap_bh);
472 cond_resched();
473 jbd_lock_bh_state(bitmap_bh);
474 }
475 /* @@@ This prevents newly-allocated data from being
476 * freed and then reallocated within the same
477 * transaction.
478 *
479 * Ideally we would want to allow that to happen, but to
480 * do so requires making jbd2_journal_forget() capable of
481 * revoking the queued write of a data block, which
482 * implies blocking on the journal lock. *forget()
483 * cannot block due to truncate races.
484 *
485 * Eventually we can fix this by making jbd2_journal_forget()
486 * return a status indicating whether or not it was able
487 * to revoke the buffer. On successful revoke, it is
488 * safe not to set the allocation bit in the committed
489 * bitmap, because we know that there is no outstanding
490 * activity on the buffer any more and so it is safe to
491 * reallocate it.
492 */
493 BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
494 J_ASSERT_BH(bitmap_bh,
495 bh2jh(bitmap_bh)->b_committed_data != NULL);
496 ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
497 bh2jh(bitmap_bh)->b_committed_data);
498
499 /*
500 * We clear the bit in the bitmap after setting the committed
501 * data bit, because this is the reverse order to that which
502 * the allocator uses.
503 */
504 BUFFER_TRACE(bitmap_bh, "clear bit"); 456 BUFFER_TRACE(bitmap_bh, "clear bit");
505 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 457 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
506 bit + i, bitmap_bh->b_data)) { 458 bit + i, bitmap_bh->b_data)) {
507 jbd_unlock_bh_state(bitmap_bh);
508 ext4_error(sb, __func__, 459 ext4_error(sb, __func__,
509 "bit already cleared for block %llu", 460 "bit already cleared for block %llu",
510 (ext4_fsblk_t)(block + i)); 461 (ext4_fsblk_t)(block + i));
511 jbd_lock_bh_state(bitmap_bh);
512 BUFFER_TRACE(bitmap_bh, "bit already cleared"); 462 BUFFER_TRACE(bitmap_bh, "bit already cleared");
513 } else { 463 } else {
514 group_freed++; 464 blocks_freed++;
515 } 465 }
516 } 466 }
517 jbd_unlock_bh_state(bitmap_bh);
518
519 spin_lock(sb_bgl_lock(sbi, block_group)); 467 spin_lock(sb_bgl_lock(sbi, block_group));
520 le16_add_cpu(&desc->bg_free_blocks_count, group_freed); 468 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
469 ext4_free_blks_set(sb, desc, blk_free_count);
521 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 470 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
522 spin_unlock(sb_bgl_lock(sbi, block_group)); 471 spin_unlock(sb_bgl_lock(sbi, block_group));
523 percpu_counter_add(&sbi->s_freeblocks_counter, count); 472 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
524 473
525 if (sbi->s_log_groups_per_flex) { 474 if (sbi->s_log_groups_per_flex) {
526 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 475 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
527 spin_lock(sb_bgl_lock(sbi, flex_group)); 476 spin_lock(sb_bgl_lock(sbi, flex_group));
528 sbi->s_flex_groups[flex_group].free_blocks += count; 477 sbi->s_flex_groups[flex_group].free_blocks += blocks_freed;
529 spin_unlock(sb_bgl_lock(sbi, flex_group)); 478 spin_unlock(sb_bgl_lock(sbi, flex_group));
530 } 479 }
480 /*
481 * request to reload the buddy with the
482 * new bitmap information
483 */
484 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
485 ext4_mb_update_group_info(grp, blocks_freed);
486 up_write(&grp->alloc_sem);
531 487
532 /* We dirtied the bitmap block */ 488 /* We dirtied the bitmap block */
533 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 489 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
534 err = ext4_journal_dirty_metadata(handle, bitmap_bh); 490 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
535 491
536 /* And the group descriptor block */ 492 /* And the group descriptor block */
537 BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); 493 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
538 ret = ext4_journal_dirty_metadata(handle, gd_bh); 494 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
539 if (!err) err = ret; 495 if (!err)
540 *pdquot_freed_blocks += group_freed; 496 err = ret;
541
542 if (overflow && !err) {
543 block += count;
544 count = overflow;
545 goto do_more;
546 }
547 sb->s_dirt = 1; 497 sb->s_dirt = 1;
498
548error_return: 499error_return:
549 brelse(bitmap_bh); 500 brelse(bitmap_bh);
550 ext4_std_error(sb, err); 501 ext4_std_error(sb, err);
@@ -614,7 +565,7 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
614 if (dirty_blocks < 0) { 565 if (dirty_blocks < 0) {
615 printk(KERN_CRIT "Dirty block accounting " 566 printk(KERN_CRIT "Dirty block accounting "
616 "went wrong %lld\n", 567 "went wrong %lld\n",
617 dirty_blocks); 568 (long long)dirty_blocks);
618 } 569 }
619 } 570 }
620 /* Check whether we have space after 571 /* Check whether we have space after
@@ -624,7 +575,7 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
624 return 1; 575 return 1;
625 576
626 /* Hm, nope. Are (enough) root reserved blocks available? */ 577 /* Hm, nope. Are (enough) root reserved blocks available? */
627 if (sbi->s_resuid == current->fsuid || 578 if (sbi->s_resuid == current_fsuid() ||
628 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || 579 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
629 capable(CAP_SYS_RESOURCE)) { 580 capable(CAP_SYS_RESOURCE)) {
630 if (free_blocks >= (nblocks + dirty_blocks)) 581 if (free_blocks >= (nblocks + dirty_blocks))
@@ -666,101 +617,45 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
666 return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); 617 return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
667} 618}
668 619
669#define EXT4_META_BLOCK 0x1
670
671static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
672 ext4_lblk_t iblock, ext4_fsblk_t goal,
673 unsigned long *count, int *errp, int flags)
674{
675 struct ext4_allocation_request ar;
676 ext4_fsblk_t ret;
677
678 memset(&ar, 0, sizeof(ar));
679 /* Fill with neighbour allocated blocks */
680
681 ar.inode = inode;
682 ar.goal = goal;
683 ar.len = *count;
684 ar.logical = iblock;
685
686 if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK))
687 /* enable in-core preallocation for data block allocation */
688 ar.flags = EXT4_MB_HINT_DATA;
689 else
690 /* disable in-core preallocation for non-regular files */
691 ar.flags = 0;
692
693 ret = ext4_mb_new_blocks(handle, &ar, errp);
694 *count = ar.len;
695 return ret;
696}
697
698/* 620/*
699 * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks 621 * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
700 * 622 *
701 * @handle: handle to this transaction 623 * @handle: handle to this transaction
702 * @inode: file inode 624 * @inode: file inode
703 * @goal: given target block(filesystem wide) 625 * @goal: given target block(filesystem wide)
704 * @count: total number of blocks need 626 * @count: pointer to total number of blocks needed
705 * @errp: error code 627 * @errp: error code
706 * 628 *
707 * Return 1st allocated block numberon success, *count stores total account 629 * Return 1st allocated block number on success, *count stores total account
708 * error stores in errp pointer 630 * error stores in errp pointer
709 */ 631 */
710ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, 632ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
711 ext4_fsblk_t goal, unsigned long *count, int *errp) 633 ext4_fsblk_t goal, unsigned long *count, int *errp)
712{ 634{
635 struct ext4_allocation_request ar;
713 ext4_fsblk_t ret; 636 ext4_fsblk_t ret;
714 ret = do_blk_alloc(handle, inode, 0, goal, 637
715 count, errp, EXT4_META_BLOCK); 638 memset(&ar, 0, sizeof(ar));
639 /* Fill with neighbour allocated blocks */
640 ar.inode = inode;
641 ar.goal = goal;
642 ar.len = count ? *count : 1;
643
644 ret = ext4_mb_new_blocks(handle, &ar, errp);
645 if (count)
646 *count = ar.len;
647
716 /* 648 /*
717 * Account for the allocated meta blocks 649 * Account for the allocated meta blocks
718 */ 650 */
719 if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { 651 if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
720 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 652 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
721 EXT4_I(inode)->i_allocated_meta_blocks += *count; 653 EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
722 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 654 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
723 } 655 }
724 return ret; 656 return ret;
725} 657}
726 658
727/*
728 * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks
729 *
730 * @handle: handle to this transaction
731 * @inode: file inode
732 * @goal: given target block(filesystem wide)
733 * @errp: error code
734 *
735 * Return allocated block number on success
736 */
737ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
738 ext4_fsblk_t goal, int *errp)
739{
740 unsigned long count = 1;
741 return ext4_new_meta_blocks(handle, inode, goal, &count, errp);
742}
743
744/*
745 * ext4_new_blocks() -- allocate data blocks
746 *
747 * @handle: handle to this transaction
748 * @inode: file inode
749 * @goal: given target block(filesystem wide)
750 * @count: total number of blocks need
751 * @errp: error code
752 *
753 * Return 1st allocated block numberon success, *count stores total account
754 * error stores in errp pointer
755 */
756
757ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
758 ext4_lblk_t iblock, ext4_fsblk_t goal,
759 unsigned long *count, int *errp)
760{
761 return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0);
762}
763
764/** 659/**
765 * ext4_count_free_blocks() -- count filesystem free blocks 660 * ext4_count_free_blocks() -- count filesystem free blocks
766 * @sb: superblock 661 * @sb: superblock
@@ -776,7 +671,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
776#ifdef EXT4FS_DEBUG 671#ifdef EXT4FS_DEBUG
777 struct ext4_super_block *es; 672 struct ext4_super_block *es;
778 ext4_fsblk_t bitmap_count; 673 ext4_fsblk_t bitmap_count;
779 unsigned long x; 674 unsigned int x;
780 struct buffer_head *bitmap_bh = NULL; 675 struct buffer_head *bitmap_bh = NULL;
781 676
782 es = EXT4_SB(sb)->s_es; 677 es = EXT4_SB(sb)->s_es;
@@ -796,7 +691,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
796 continue; 691 continue;
797 692
798 x = ext4_count_free(bitmap_bh, sb->s_blocksize); 693 x = ext4_count_free(bitmap_bh, sb->s_blocksize);
799 printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", 694 printk(KERN_DEBUG "group %lu: stored = %d, counted = %u\n",
800 i, le16_to_cpu(gdp->bg_free_blocks_count), x); 695 i, le16_to_cpu(gdp->bg_free_blocks_count), x);
801 bitmap_count += x; 696 bitmap_count += x;
802 } 697 }
@@ -812,7 +707,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
812 gdp = ext4_get_group_desc(sb, i, NULL); 707 gdp = ext4_get_group_desc(sb, i, NULL);
813 if (!gdp) 708 if (!gdp)
814 continue; 709 continue;
815 desc_count += le16_to_cpu(gdp->bg_free_blocks_count); 710 desc_count += ext4_free_blks_count(sb, gdp);
816 } 711 }
817 712
818 return desc_count; 713 return desc_count;
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 0a7a6663c190..fa3af81ac565 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -15,10 +15,9 @@
15 15
16static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; 16static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
17 17
18unsigned long ext4_count_free(struct buffer_head *map, unsigned int numchars) 18unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars)
19{ 19{
20 unsigned int i; 20 unsigned int i, sum = 0;
21 unsigned long sum = 0;
22 21
23 if (!map) 22 if (!map)
24 return 0; 23 return 0;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index fed5b610df5a..2df2e40b01af 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -64,7 +64,7 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
64int ext4_check_dir_entry(const char *function, struct inode *dir, 64int ext4_check_dir_entry(const char *function, struct inode *dir,
65 struct ext4_dir_entry_2 *de, 65 struct ext4_dir_entry_2 *de,
66 struct buffer_head *bh, 66 struct buffer_head *bh,
67 unsigned long offset) 67 unsigned int offset)
68{ 68{
69 const char *error_msg = NULL; 69 const char *error_msg = NULL;
70 const int rlen = ext4_rec_len_from_disk(de->rec_len); 70 const int rlen = ext4_rec_len_from_disk(de->rec_len);
@@ -84,9 +84,9 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
84 if (error_msg != NULL) 84 if (error_msg != NULL)
85 ext4_error(dir->i_sb, function, 85 ext4_error(dir->i_sb, function,
86 "bad entry in directory #%lu: %s - " 86 "bad entry in directory #%lu: %s - "
87 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", 87 "offset=%u, inode=%u, rec_len=%d, name_len=%d",
88 dir->i_ino, error_msg, offset, 88 dir->i_ino, error_msg, offset,
89 (unsigned long) le32_to_cpu(de->inode), 89 le32_to_cpu(de->inode),
90 rlen, de->name_len); 90 rlen, de->name_len);
91 return error_msg == NULL ? 1 : 0; 91 return error_msg == NULL ? 1 : 0;
92} 92}
@@ -95,7 +95,7 @@ static int ext4_readdir(struct file *filp,
95 void *dirent, filldir_t filldir) 95 void *dirent, filldir_t filldir)
96{ 96{
97 int error = 0; 97 int error = 0;
98 unsigned long offset; 98 unsigned int offset;
99 int i, stored; 99 int i, stored;
100 struct ext4_dir_entry_2 *de; 100 struct ext4_dir_entry_2 *de;
101 struct super_block *sb; 101 struct super_block *sb;
@@ -405,7 +405,7 @@ static int call_filldir(struct file *filp, void *dirent,
405 sb = inode->i_sb; 405 sb = inode->i_sb;
406 406
407 if (!fname) { 407 if (!fname) {
408 printk(KERN_ERR "ext4: call_filldir: called with " 408 printk(KERN_ERR "EXT4-fs: call_filldir: called with "
409 "null fname?!?\n"); 409 "null fname?!?\n");
410 return 0; 410 return 0;
411 } 411 }
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0537c827024..c668e4377d76 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -19,6 +19,7 @@
19#include <linux/types.h> 19#include <linux/types.h>
20#include <linux/blkdev.h> 20#include <linux/blkdev.h>
21#include <linux/magic.h> 21#include <linux/magic.h>
22#include <linux/jbd2.h>
22#include "ext4_i.h" 23#include "ext4_i.h"
23 24
24/* 25/*
@@ -94,9 +95,9 @@ struct ext4_allocation_request {
94 /* phys. block for ^^^ */ 95 /* phys. block for ^^^ */
95 ext4_fsblk_t pright; 96 ext4_fsblk_t pright;
96 /* how many blocks we want to allocate */ 97 /* how many blocks we want to allocate */
97 unsigned long len; 98 unsigned int len;
98 /* flags. see above EXT4_MB_HINT_* */ 99 /* flags. see above EXT4_MB_HINT_* */
99 unsigned long flags; 100 unsigned int flags;
100}; 101};
101 102
102/* 103/*
@@ -156,12 +157,12 @@ struct ext4_group_desc
156 __le32 bg_block_bitmap_lo; /* Blocks bitmap block */ 157 __le32 bg_block_bitmap_lo; /* Blocks bitmap block */
157 __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */ 158 __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
158 __le32 bg_inode_table_lo; /* Inodes table block */ 159 __le32 bg_inode_table_lo; /* Inodes table block */
159 __le16 bg_free_blocks_count; /* Free blocks count */ 160 __le16 bg_free_blocks_count_lo;/* Free blocks count */
160 __le16 bg_free_inodes_count; /* Free inodes count */ 161 __le16 bg_free_inodes_count_lo;/* Free inodes count */
161 __le16 bg_used_dirs_count; /* Directories count */ 162 __le16 bg_used_dirs_count_lo; /* Directories count */
162 __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ 163 __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
163 __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ 164 __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
164 __le16 bg_itable_unused; /* Unused inodes count */ 165 __le16 bg_itable_unused_lo; /* Unused inodes count */
165 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ 166 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
166 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ 167 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
167 __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ 168 __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
@@ -169,7 +170,7 @@ struct ext4_group_desc
169 __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */ 170 __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
170 __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ 171 __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
171 __le16 bg_used_dirs_count_hi; /* Directories count MSB */ 172 __le16 bg_used_dirs_count_hi; /* Directories count MSB */
172 __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ 173 __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
173 __u32 bg_reserved2[3]; 174 __u32 bg_reserved2[3];
174}; 175};
175 176
@@ -328,6 +329,7 @@ struct ext4_mount_options {
328 uid_t s_resuid; 329 uid_t s_resuid;
329 gid_t s_resgid; 330 gid_t s_resgid;
330 unsigned long s_commit_interval; 331 unsigned long s_commit_interval;
332 u32 s_min_batch_time, s_max_batch_time;
331#ifdef CONFIG_QUOTA 333#ifdef CONFIG_QUOTA
332 int s_jquota_fmt; 334 int s_jquota_fmt;
333 char *s_qf_names[MAXQUOTAS]; 335 char *s_qf_names[MAXQUOTAS];
@@ -534,7 +536,6 @@ do { \
534#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ 536#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
535#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ 537#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
536#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ 538#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
537#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */
538#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 539#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
539#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 540#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
540#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 541#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
@@ -726,11 +727,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
726 */ 727 */
727 728
728#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ 729#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
729 (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) 730 ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0)
730#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ 731#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
731 (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) 732 ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0)
732#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ 733#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
733 (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) 734 ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0)
734#define EXT4_SET_COMPAT_FEATURE(sb,mask) \ 735#define EXT4_SET_COMPAT_FEATURE(sb,mask) \
735 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) 736 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
736#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ 737#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
@@ -806,6 +807,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
806#define EXT4_DEFM_JMODE_WBACK 0x0060 807#define EXT4_DEFM_JMODE_WBACK 0x0060
807 808
808/* 809/*
810 * Default journal batch times
811 */
812#define EXT4_DEF_MIN_BATCH_TIME 0
813#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */
814
815/*
809 * Structure of a directory entry 816 * Structure of a directory entry
810 */ 817 */
811#define EXT4_NAME_LEN 255 818#define EXT4_NAME_LEN 255
@@ -891,6 +898,9 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len)
891#define DX_HASH_LEGACY 0 898#define DX_HASH_LEGACY 0
892#define DX_HASH_HALF_MD4 1 899#define DX_HASH_HALF_MD4 1
893#define DX_HASH_TEA 2 900#define DX_HASH_TEA 2
901#define DX_HASH_LEGACY_UNSIGNED 3
902#define DX_HASH_HALF_MD4_UNSIGNED 4
903#define DX_HASH_TEA_UNSIGNED 5
894 904
895#ifdef __KERNEL__ 905#ifdef __KERNEL__
896 906
@@ -955,7 +965,7 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
955#define ERR_BAD_DX_DIR -75000 965#define ERR_BAD_DX_DIR -75000
956 966
957void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 967void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
958 unsigned long *blockgrpp, ext4_grpblk_t *offsetp); 968 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
959 969
960extern struct proc_dir_entry *ext4_proc_root; 970extern struct proc_dir_entry *ext4_proc_root;
961 971
@@ -987,6 +997,9 @@ do { \
987# define ATTRIB_NORET __attribute__((noreturn)) 997# define ATTRIB_NORET __attribute__((noreturn))
988# define NORET_AND noreturn, 998# define NORET_AND noreturn,
989 999
1000/* bitmap.c */
1001extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
1002
990/* balloc.c */ 1003/* balloc.c */
991extern unsigned int ext4_block_group(struct super_block *sb, 1004extern unsigned int ext4_block_group(struct super_block *sb,
992 ext4_fsblk_t blocknr); 1005 ext4_fsblk_t blocknr);
@@ -995,20 +1008,14 @@ extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
995extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); 1008extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
996extern unsigned long ext4_bg_num_gdb(struct super_block *sb, 1009extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
997 ext4_group_t group); 1010 ext4_group_t group);
998extern ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
999 ext4_fsblk_t goal, int *errp);
1000extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, 1011extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1001 ext4_fsblk_t goal, unsigned long *count, int *errp); 1012 ext4_fsblk_t goal, unsigned long *count, int *errp);
1002extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
1003 ext4_lblk_t iblock, ext4_fsblk_t goal,
1004 unsigned long *count, int *errp);
1005extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1013extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1006extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1014extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1007extern void ext4_free_blocks(handle_t *handle, struct inode *inode, 1015extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1008 ext4_fsblk_t block, unsigned long count, int metadata); 1016 ext4_fsblk_t block, unsigned long count, int metadata);
1009extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, 1017extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
1010 ext4_fsblk_t block, unsigned long count, 1018 ext4_fsblk_t block, unsigned long count);
1011 unsigned long *pdquot_freed_blocks);
1012extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1019extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
1013extern void ext4_check_blocks_bitmap(struct super_block *); 1020extern void ext4_check_blocks_bitmap(struct super_block *);
1014extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 1021extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
@@ -1019,7 +1026,7 @@ extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
1019/* dir.c */ 1026/* dir.c */
1020extern int ext4_check_dir_entry(const char *, struct inode *, 1027extern int ext4_check_dir_entry(const char *, struct inode *,
1021 struct ext4_dir_entry_2 *, 1028 struct ext4_dir_entry_2 *,
1022 struct buffer_head *, unsigned long); 1029 struct buffer_head *, unsigned int);
1023extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, 1030extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
1024 __u32 minor_hash, 1031 __u32 minor_hash,
1025 struct ext4_dir_entry_2 *dirent); 1032 struct ext4_dir_entry_2 *dirent);
@@ -1039,7 +1046,6 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1039extern unsigned long ext4_count_free_inodes(struct super_block *); 1046extern unsigned long ext4_count_free_inodes(struct super_block *);
1040extern unsigned long ext4_count_dirs(struct super_block *); 1047extern unsigned long ext4_count_dirs(struct super_block *);
1041extern void ext4_check_inodes_bitmap(struct super_block *); 1048extern void ext4_check_inodes_bitmap(struct super_block *);
1042extern unsigned long ext4_count_free(struct buffer_head *, unsigned);
1043 1049
1044/* mballoc.c */ 1050/* mballoc.c */
1045extern long ext4_mb_stats; 1051extern long ext4_mb_stats;
@@ -1054,12 +1060,13 @@ extern int __init init_ext4_mballoc(void);
1054extern void exit_ext4_mballoc(void); 1060extern void exit_ext4_mballoc(void);
1055extern void ext4_mb_free_blocks(handle_t *, struct inode *, 1061extern void ext4_mb_free_blocks(handle_t *, struct inode *,
1056 unsigned long, unsigned long, int, unsigned long *); 1062 unsigned long, unsigned long, int, unsigned long *);
1057extern int ext4_mb_add_more_groupinfo(struct super_block *sb, 1063extern int ext4_mb_add_groupinfo(struct super_block *sb,
1058 ext4_group_t i, struct ext4_group_desc *desc); 1064 ext4_group_t i, struct ext4_group_desc *desc);
1059extern void ext4_mb_update_group_info(struct ext4_group_info *grp, 1065extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
1060 ext4_grpblk_t add); 1066 ext4_grpblk_t add);
1061 1067extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
1062 1068extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
1069 ext4_group_t, int);
1063/* inode.c */ 1070/* inode.c */
1064int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 1071int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1065 struct buffer_head *bh, ext4_fsblk_t blocknr); 1072 struct buffer_head *bh, ext4_fsblk_t blocknr);
@@ -1069,10 +1076,6 @@ struct buffer_head *ext4_bread(handle_t *, struct inode *,
1069 ext4_lblk_t, int, int *); 1076 ext4_lblk_t, int, int *);
1070int ext4_get_block(struct inode *inode, sector_t iblock, 1077int ext4_get_block(struct inode *inode, sector_t iblock,
1071 struct buffer_head *bh_result, int create); 1078 struct buffer_head *bh_result, int create);
1072int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
1073 ext4_lblk_t iblock, unsigned long maxblocks,
1074 struct buffer_head *bh_result,
1075 int create, int extend_disksize);
1076 1079
1077extern struct inode *ext4_iget(struct super_block *, unsigned long); 1080extern struct inode *ext4_iget(struct super_block *, unsigned long);
1078extern int ext4_write_inode(struct inode *, int); 1081extern int ext4_write_inode(struct inode *, int);
@@ -1123,6 +1126,9 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...)
1123 __attribute__ ((format (printf, 3, 4))); 1126 __attribute__ ((format (printf, 3, 4)));
1124extern void ext4_warning(struct super_block *, const char *, const char *, ...) 1127extern void ext4_warning(struct super_block *, const char *, const char *, ...)
1125 __attribute__ ((format (printf, 3, 4))); 1128 __attribute__ ((format (printf, 3, 4)));
1129extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
1130 const char *, const char *, ...)
1131 __attribute__ ((format (printf, 4, 5)));
1126extern void ext4_update_dynamic_rev(struct super_block *sb); 1132extern void ext4_update_dynamic_rev(struct super_block *sb);
1127extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, 1133extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
1128 __u32 compat); 1134 __u32 compat);
@@ -1136,12 +1142,28 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
1136 struct ext4_group_desc *bg); 1142 struct ext4_group_desc *bg);
1137extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, 1143extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
1138 struct ext4_group_desc *bg); 1144 struct ext4_group_desc *bg);
1145extern __u32 ext4_free_blks_count(struct super_block *sb,
1146 struct ext4_group_desc *bg);
1147extern __u32 ext4_free_inodes_count(struct super_block *sb,
1148 struct ext4_group_desc *bg);
1149extern __u32 ext4_used_dirs_count(struct super_block *sb,
1150 struct ext4_group_desc *bg);
1151extern __u32 ext4_itable_unused_count(struct super_block *sb,
1152 struct ext4_group_desc *bg);
1139extern void ext4_block_bitmap_set(struct super_block *sb, 1153extern void ext4_block_bitmap_set(struct super_block *sb,
1140 struct ext4_group_desc *bg, ext4_fsblk_t blk); 1154 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1141extern void ext4_inode_bitmap_set(struct super_block *sb, 1155extern void ext4_inode_bitmap_set(struct super_block *sb,
1142 struct ext4_group_desc *bg, ext4_fsblk_t blk); 1156 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1143extern void ext4_inode_table_set(struct super_block *sb, 1157extern void ext4_inode_table_set(struct super_block *sb,
1144 struct ext4_group_desc *bg, ext4_fsblk_t blk); 1158 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1159extern void ext4_free_blks_set(struct super_block *sb,
1160 struct ext4_group_desc *bg, __u32 count);
1161extern void ext4_free_inodes_set(struct super_block *sb,
1162 struct ext4_group_desc *bg, __u32 count);
1163extern void ext4_used_dirs_set(struct super_block *sb,
1164 struct ext4_group_desc *bg, __u32 count);
1165extern void ext4_itable_unused_set(struct super_block *sb,
1166 struct ext4_group_desc *bg, __u32 count);
1145 1167
1146static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) 1168static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
1147{ 1169{
@@ -1225,11 +1247,11 @@ do { \
1225} while (0) 1247} while (0)
1226 1248
1227#ifdef CONFIG_SMP 1249#ifdef CONFIG_SMP
1228/* Each CPU can accumulate FBC_BATCH blocks in their local 1250/* Each CPU can accumulate percpu_counter_batch blocks in their local
1229 * counters. So we need to make sure we have free blocks more 1251 * counters. So we need to make sure we have free blocks more
1230 * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times. 1252 * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times.
1231 */ 1253 */
1232#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids)) 1254#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
1233#else 1255#else
1234#define EXT4_FREEBLOCKS_WATERMARK 0 1256#define EXT4_FREEBLOCKS_WATERMARK 0
1235#endif 1257#endif
@@ -1246,6 +1268,50 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
1246 return ; 1268 return ;
1247} 1269}
1248 1270
1271struct ext4_group_info {
1272 unsigned long bb_state;
1273 struct rb_root bb_free_root;
1274 unsigned short bb_first_free;
1275 unsigned short bb_free;
1276 unsigned short bb_fragments;
1277 struct list_head bb_prealloc_list;
1278#ifdef DOUBLE_CHECK
1279 void *bb_bitmap;
1280#endif
1281 struct rw_semaphore alloc_sem;
1282 unsigned short bb_counters[];
1283};
1284
1285#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
1286#define EXT4_GROUP_INFO_LOCKED_BIT 1
1287
1288#define EXT4_MB_GRP_NEED_INIT(grp) \
1289 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
1290
1291static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
1292{
1293 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
1294
1295 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
1296}
1297
1298static inline void ext4_unlock_group(struct super_block *sb,
1299 ext4_group_t group)
1300{
1301 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
1302
1303 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
1304}
1305
1306static inline int ext4_is_group_locked(struct super_block *sb,
1307 ext4_group_t group)
1308{
1309 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
1310
1311 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
1312 &(grinfo->bb_state));
1313}
1314
1249/* 1315/*
1250 * Inodes and files operations 1316 * Inodes and files operations
1251 */ 1317 */
@@ -1271,18 +1337,38 @@ extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
1271extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, 1337extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
1272 int chunk); 1338 int chunk);
1273extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 1339extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1274 ext4_lblk_t iblock, 1340 ext4_lblk_t iblock, unsigned int max_blocks,
1275 unsigned long max_blocks, struct buffer_head *bh_result, 1341 struct buffer_head *bh_result,
1276 int create, int extend_disksize); 1342 int create, int extend_disksize);
1277extern void ext4_ext_truncate(struct inode *); 1343extern void ext4_ext_truncate(struct inode *);
1278extern void ext4_ext_init(struct super_block *); 1344extern void ext4_ext_init(struct super_block *);
1279extern void ext4_ext_release(struct super_block *); 1345extern void ext4_ext_release(struct super_block *);
1280extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, 1346extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1281 loff_t len); 1347 loff_t len);
1282extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, 1348extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
1283 sector_t block, unsigned long max_blocks, 1349 sector_t block, unsigned int max_blocks,
1284 struct buffer_head *bh, int create, 1350 struct buffer_head *bh, int create,
1285 int extend_disksize, int flag); 1351 int extend_disksize, int flag);
1352extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1353 __u64 start, __u64 len);
1354
1355/*
1356 * Add new method to test wether block and inode bitmaps are properly
1357 * initialized. With uninit_bg reading the block from disk is not enough
1358 * to mark the bitmap uptodate. We need to also zero-out the bitmap
1359 */
1360#define BH_BITMAP_UPTODATE BH_JBDPrivateStart
1361
1362static inline int bitmap_uptodate(struct buffer_head *bh)
1363{
1364 return (buffer_uptodate(bh) &&
1365 test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state));
1366}
1367static inline void set_bitmap_uptodate(struct buffer_head *bh)
1368{
1369 set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
1370}
1371
1286#endif /* __KERNEL__ */ 1372#endif /* __KERNEL__ */
1287 1373
1288#endif /* _EXT4_H */ 1374#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index bec7ce59fc0d..18cb67b2cbbc 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -194,11 +194,6 @@ static inline unsigned short ext_depth(struct inode *inode)
194 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); 194 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
195} 195}
196 196
197static inline void ext4_ext_tree_changed(struct inode *inode)
198{
199 EXT4_I(inode)->i_ext_generation++;
200}
201
202static inline void 197static inline void
203ext4_ext_invalidate_cache(struct inode *inode) 198ext4_ext_invalidate_cache(struct inode *inode)
204{ 199{
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
index 5c124c0ac6d3..e69acc16f5c4 100644
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -31,7 +31,7 @@ typedef unsigned long long ext4_fsblk_t;
31typedef __u32 ext4_lblk_t; 31typedef __u32 ext4_lblk_t;
32 32
33/* data type for block group number */ 33/* data type for block group number */
34typedef unsigned long ext4_group_t; 34typedef unsigned int ext4_group_t;
35 35
36#define rsv_start rsv_window._rsv_start 36#define rsv_start rsv_window._rsv_start
37#define rsv_end rsv_window._rsv_end 37#define rsv_end rsv_window._rsv_end
@@ -100,9 +100,6 @@ struct ext4_inode_info {
100 */ 100 */
101 loff_t i_disksize; 101 loff_t i_disksize;
102 102
103 /* on-disk additional length */
104 __u16 i_extra_isize;
105
106 /* 103 /*
107 * i_data_sem is for serialising ext4_truncate() against 104 * i_data_sem is for serialising ext4_truncate() against
108 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's 105 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
@@ -117,7 +114,6 @@ struct ext4_inode_info {
117 struct inode vfs_inode; 114 struct inode vfs_inode;
118 struct jbd2_inode jinode; 115 struct jbd2_inode jinode;
119 116
120 unsigned long i_ext_generation;
121 struct ext4_ext_cache i_cached_extent; 117 struct ext4_ext_cache i_cached_extent;
122 /* 118 /*
123 * File creation time. Its function is same as that of 119 * File creation time. Its function is same as that of
@@ -130,10 +126,14 @@ struct ext4_inode_info {
130 spinlock_t i_prealloc_lock; 126 spinlock_t i_prealloc_lock;
131 127
132 /* allocation reservation info for delalloc */ 128 /* allocation reservation info for delalloc */
133 unsigned long i_reserved_data_blocks; 129 unsigned int i_reserved_data_blocks;
134 unsigned long i_reserved_meta_blocks; 130 unsigned int i_reserved_meta_blocks;
135 unsigned long i_allocated_meta_blocks; 131 unsigned int i_allocated_meta_blocks;
136 unsigned short i_delalloc_reserved_flag; 132 unsigned short i_delalloc_reserved_flag;
133
134 /* on-disk additional length */
135 __u16 i_extra_isize;
136
137 spinlock_t i_block_reservation_lock; 137 spinlock_t i_block_reservation_lock;
138}; 138};
139 139
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index c75384b34f2c..ad13a84644e1 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -7,53 +7,96 @@
7int __ext4_journal_get_undo_access(const char *where, handle_t *handle, 7int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 8 struct buffer_head *bh)
9{ 9{
10 int err = jbd2_journal_get_undo_access(handle, bh); 10 int err = 0;
11 if (err) 11
12 ext4_journal_abort_handle(where, __func__, bh, handle, err); 12 if (ext4_handle_valid(handle)) {
13 err = jbd2_journal_get_undo_access(handle, bh);
14 if (err)
15 ext4_journal_abort_handle(where, __func__, bh,
16 handle, err);
17 }
13 return err; 18 return err;
14} 19}
15 20
16int __ext4_journal_get_write_access(const char *where, handle_t *handle, 21int __ext4_journal_get_write_access(const char *where, handle_t *handle,
17 struct buffer_head *bh) 22 struct buffer_head *bh)
18{ 23{
19 int err = jbd2_journal_get_write_access(handle, bh); 24 int err = 0;
20 if (err) 25
21 ext4_journal_abort_handle(where, __func__, bh, handle, err); 26 if (ext4_handle_valid(handle)) {
27 err = jbd2_journal_get_write_access(handle, bh);
28 if (err)
29 ext4_journal_abort_handle(where, __func__, bh,
30 handle, err);
31 }
22 return err; 32 return err;
23} 33}
24 34
25int __ext4_journal_forget(const char *where, handle_t *handle, 35int __ext4_journal_forget(const char *where, handle_t *handle,
26 struct buffer_head *bh) 36 struct buffer_head *bh)
27{ 37{
28 int err = jbd2_journal_forget(handle, bh); 38 int err = 0;
29 if (err) 39
30 ext4_journal_abort_handle(where, __func__, bh, handle, err); 40 if (ext4_handle_valid(handle)) {
41 err = jbd2_journal_forget(handle, bh);
42 if (err)
43 ext4_journal_abort_handle(where, __func__, bh,
44 handle, err);
45 }
31 return err; 46 return err;
32} 47}
33 48
34int __ext4_journal_revoke(const char *where, handle_t *handle, 49int __ext4_journal_revoke(const char *where, handle_t *handle,
35 ext4_fsblk_t blocknr, struct buffer_head *bh) 50 ext4_fsblk_t blocknr, struct buffer_head *bh)
36{ 51{
37 int err = jbd2_journal_revoke(handle, blocknr, bh); 52 int err = 0;
38 if (err) 53
39 ext4_journal_abort_handle(where, __func__, bh, handle, err); 54 if (ext4_handle_valid(handle)) {
55 err = jbd2_journal_revoke(handle, blocknr, bh);
56 if (err)
57 ext4_journal_abort_handle(where, __func__, bh,
58 handle, err);
59 }
40 return err; 60 return err;
41} 61}
42 62
43int __ext4_journal_get_create_access(const char *where, 63int __ext4_journal_get_create_access(const char *where,
44 handle_t *handle, struct buffer_head *bh) 64 handle_t *handle, struct buffer_head *bh)
45{ 65{
46 int err = jbd2_journal_get_create_access(handle, bh); 66 int err = 0;
47 if (err) 67
48 ext4_journal_abort_handle(where, __func__, bh, handle, err); 68 if (ext4_handle_valid(handle)) {
69 err = jbd2_journal_get_create_access(handle, bh);
70 if (err)
71 ext4_journal_abort_handle(where, __func__, bh,
72 handle, err);
73 }
49 return err; 74 return err;
50} 75}
51 76
52int __ext4_journal_dirty_metadata(const char *where, 77int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
53 handle_t *handle, struct buffer_head *bh) 78 struct inode *inode, struct buffer_head *bh)
54{ 79{
55 int err = jbd2_journal_dirty_metadata(handle, bh); 80 int err = 0;
56 if (err) 81
57 ext4_journal_abort_handle(where, __func__, bh, handle, err); 82 if (ext4_handle_valid(handle)) {
83 err = jbd2_journal_dirty_metadata(handle, bh);
84 if (err)
85 ext4_journal_abort_handle(where, __func__, bh,
86 handle, err);
87 } else {
88 mark_buffer_dirty(bh);
89 if (inode && inode_needs_sync(inode)) {
90 sync_dirty_buffer(bh);
91 if (buffer_req(bh) && !buffer_uptodate(bh)) {
92 ext4_error(inode->i_sb, __func__,
93 "IO error syncing inode, "
94 "inode=%lu, block=%llu",
95 inode->i_ino,
96 (unsigned long long) bh->b_blocknr);
97 err = -EIO;
98 }
99 }
100 }
58 return err; 101 return err;
59} 102}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index b455c685a98b..be2f426f6805 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -32,8 +32,8 @@
32 * 5 levels of tree + root which are stored in the inode. */ 32 * 5 levels of tree + root which are stored in the inode. */
33 33
34#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \ 34#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
35 (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \ 35 (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
36 || test_opt(sb, EXTENTS) ? 27U : 8U) 36 ? 27U : 8U)
37 37
38/* Extended attribute operations touch at most two data buffers, 38/* Extended attribute operations touch at most two data buffers,
39 * two bitmap buffers, and two group summaries, in addition to the inode 39 * two bitmap buffers, and two group summaries, in addition to the inode
@@ -122,12 +122,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
122 * been done yet. 122 * been done yet.
123 */ 123 */
124 124
125static inline void ext4_journal_release_buffer(handle_t *handle,
126 struct buffer_head *bh)
127{
128 jbd2_journal_release_buffer(handle, bh);
129}
130
131void ext4_journal_abort_handle(const char *caller, const char *err_fn, 125void ext4_journal_abort_handle(const char *caller, const char *err_fn,
132 struct buffer_head *bh, handle_t *handle, int err); 126 struct buffer_head *bh, handle_t *handle, int err);
133 127
@@ -146,8 +140,8 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
146int __ext4_journal_get_create_access(const char *where, 140int __ext4_journal_get_create_access(const char *where,
147 handle_t *handle, struct buffer_head *bh); 141 handle_t *handle, struct buffer_head *bh);
148 142
149int __ext4_journal_dirty_metadata(const char *where, 143int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
150 handle_t *handle, struct buffer_head *bh); 144 struct inode *inode, struct buffer_head *bh);
151 145
152#define ext4_journal_get_undo_access(handle, bh) \ 146#define ext4_journal_get_undo_access(handle, bh) \
153 __ext4_journal_get_undo_access(__func__, (handle), (bh)) 147 __ext4_journal_get_undo_access(__func__, (handle), (bh))
@@ -157,14 +151,57 @@ int __ext4_journal_dirty_metadata(const char *where,
157 __ext4_journal_revoke(__func__, (handle), (blocknr), (bh)) 151 __ext4_journal_revoke(__func__, (handle), (blocknr), (bh))
158#define ext4_journal_get_create_access(handle, bh) \ 152#define ext4_journal_get_create_access(handle, bh) \
159 __ext4_journal_get_create_access(__func__, (handle), (bh)) 153 __ext4_journal_get_create_access(__func__, (handle), (bh))
160#define ext4_journal_dirty_metadata(handle, bh) \
161 __ext4_journal_dirty_metadata(__func__, (handle), (bh))
162#define ext4_journal_forget(handle, bh) \ 154#define ext4_journal_forget(handle, bh) \
163 __ext4_journal_forget(__func__, (handle), (bh)) 155 __ext4_journal_forget(__func__, (handle), (bh))
156#define ext4_handle_dirty_metadata(handle, inode, bh) \
157 __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))
164 158
165handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); 159handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
166int __ext4_journal_stop(const char *where, handle_t *handle); 160int __ext4_journal_stop(const char *where, handle_t *handle);
167 161
162#define EXT4_NOJOURNAL_HANDLE ((handle_t *) 0x1)
163
164static inline int ext4_handle_valid(handle_t *handle)
165{
166 if (handle == EXT4_NOJOURNAL_HANDLE)
167 return 0;
168 return 1;
169}
170
171static inline void ext4_handle_sync(handle_t *handle)
172{
173 if (ext4_handle_valid(handle))
174 handle->h_sync = 1;
175}
176
177static inline void ext4_handle_release_buffer(handle_t *handle,
178 struct buffer_head *bh)
179{
180 if (ext4_handle_valid(handle))
181 jbd2_journal_release_buffer(handle, bh);
182}
183
184static inline int ext4_handle_is_aborted(handle_t *handle)
185{
186 if (ext4_handle_valid(handle))
187 return is_handle_aborted(handle);
188 return 0;
189}
190
191static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
192{
193 if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed)
194 return 0;
195 return 1;
196}
197
198static inline void ext4_journal_release_buffer(handle_t *handle,
199 struct buffer_head *bh)
200{
201 if (ext4_handle_valid(handle))
202 jbd2_journal_release_buffer(handle, bh);
203}
204
168static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) 205static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
169{ 206{
170 return ext4_journal_start_sb(inode->i_sb, nblocks); 207 return ext4_journal_start_sb(inode->i_sb, nblocks);
@@ -180,27 +217,37 @@ static inline handle_t *ext4_journal_current_handle(void)
180 217
181static inline int ext4_journal_extend(handle_t *handle, int nblocks) 218static inline int ext4_journal_extend(handle_t *handle, int nblocks)
182{ 219{
183 return jbd2_journal_extend(handle, nblocks); 220 if (ext4_handle_valid(handle))
221 return jbd2_journal_extend(handle, nblocks);
222 return 0;
184} 223}
185 224
186static inline int ext4_journal_restart(handle_t *handle, int nblocks) 225static inline int ext4_journal_restart(handle_t *handle, int nblocks)
187{ 226{
188 return jbd2_journal_restart(handle, nblocks); 227 if (ext4_handle_valid(handle))
228 return jbd2_journal_restart(handle, nblocks);
229 return 0;
189} 230}
190 231
191static inline int ext4_journal_blocks_per_page(struct inode *inode) 232static inline int ext4_journal_blocks_per_page(struct inode *inode)
192{ 233{
193 return jbd2_journal_blocks_per_page(inode); 234 if (EXT4_JOURNAL(inode) != NULL)
235 return jbd2_journal_blocks_per_page(inode);
236 return 0;
194} 237}
195 238
196static inline int ext4_journal_force_commit(journal_t *journal) 239static inline int ext4_journal_force_commit(journal_t *journal)
197{ 240{
198 return jbd2_journal_force_commit(journal); 241 if (journal)
242 return jbd2_journal_force_commit(journal);
243 return 0;
199} 244}
200 245
201static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) 246static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
202{ 247{
203 return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode); 248 if (ext4_handle_valid(handle))
249 return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
250 return 0;
204} 251}
205 252
206/* super.c */ 253/* super.c */
@@ -208,6 +255,8 @@ int ext4_force_commit(struct super_block *sb);
208 255
209static inline int ext4_should_journal_data(struct inode *inode) 256static inline int ext4_should_journal_data(struct inode *inode)
210{ 257{
258 if (EXT4_JOURNAL(inode) == NULL)
259 return 0;
211 if (!S_ISREG(inode->i_mode)) 260 if (!S_ISREG(inode->i_mode))
212 return 1; 261 return 1;
213 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 262 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@ -219,6 +268,8 @@ static inline int ext4_should_journal_data(struct inode *inode)
219 268
220static inline int ext4_should_order_data(struct inode *inode) 269static inline int ext4_should_order_data(struct inode *inode)
221{ 270{
271 if (EXT4_JOURNAL(inode) == NULL)
272 return 0;
222 if (!S_ISREG(inode->i_mode)) 273 if (!S_ISREG(inode->i_mode))
223 return 0; 274 return 0;
224 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) 275 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
@@ -230,6 +281,8 @@ static inline int ext4_should_order_data(struct inode *inode)
230 281
231static inline int ext4_should_writeback_data(struct inode *inode) 282static inline int ext4_should_writeback_data(struct inode *inode)
232{ 283{
284 if (EXT4_JOURNAL(inode) == NULL)
285 return 0;
233 if (!S_ISREG(inode->i_mode)) 286 if (!S_ISREG(inode->i_mode))
234 return 0; 287 return 0;
235 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) 288 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 445fde603df8..039b6ea1a042 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -57,6 +57,7 @@ struct ext4_sb_info {
57 u32 s_next_generation; 57 u32 s_next_generation;
58 u32 s_hash_seed[4]; 58 u32 s_hash_seed[4];
59 int s_def_hash_version; 59 int s_def_hash_version;
60 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
60 struct percpu_counter s_freeblocks_counter; 61 struct percpu_counter s_freeblocks_counter;
61 struct percpu_counter s_freeinodes_counter; 62 struct percpu_counter s_freeinodes_counter;
62 struct percpu_counter s_dirs_counter; 63 struct percpu_counter s_dirs_counter;
@@ -73,6 +74,8 @@ struct ext4_sb_info {
73 struct journal_s *s_journal; 74 struct journal_s *s_journal;
74 struct list_head s_orphan; 75 struct list_head s_orphan;
75 unsigned long s_commit_interval; 76 unsigned long s_commit_interval;
77 u32 s_max_batch_time;
78 u32 s_min_batch_time;
76 struct block_device *journal_bdev; 79 struct block_device *journal_bdev;
77#ifdef CONFIG_JBD2_DEBUG 80#ifdef CONFIG_JBD2_DEBUG
78 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ 81 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
@@ -101,7 +104,8 @@ struct ext4_sb_info {
101 spinlock_t s_reserve_lock; 104 spinlock_t s_reserve_lock;
102 spinlock_t s_md_lock; 105 spinlock_t s_md_lock;
103 tid_t s_last_transaction; 106 tid_t s_last_transaction;
104 unsigned short *s_mb_offsets, *s_mb_maxs; 107 unsigned short *s_mb_offsets;
108 unsigned int *s_mb_maxs;
105 109
106 /* tunables */ 110 /* tunables */
107 unsigned long s_stripe; 111 unsigned long s_stripe;
@@ -146,4 +150,10 @@ struct ext4_sb_info {
146 struct flex_groups *s_flex_groups; 150 struct flex_groups *s_flex_groups;
147}; 151};
148 152
153static inline spinlock_t *
154sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
155{
156 return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
157}
158
149#endif /* _EXT4_SB */ 159#endif /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ea2ce3c0ae66..54bf0623a9ae 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -97,6 +97,8 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
97{ 97{
98 int err; 98 int err;
99 99
100 if (!ext4_handle_valid(handle))
101 return 0;
100 if (handle->h_buffer_credits > needed) 102 if (handle->h_buffer_credits > needed)
101 return 0; 103 return 0;
102 err = ext4_journal_extend(handle, needed); 104 err = ext4_journal_extend(handle, needed);
@@ -134,7 +136,7 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
134 int err; 136 int err;
135 if (path->p_bh) { 137 if (path->p_bh) {
136 /* path points to block */ 138 /* path points to block */
137 err = ext4_journal_dirty_metadata(handle, path->p_bh); 139 err = ext4_handle_dirty_metadata(handle, inode, path->p_bh);
138 } else { 140 } else {
139 /* path points to leaf/index in inode body */ 141 /* path points to leaf/index in inode body */
140 err = ext4_mark_inode_dirty(handle, inode); 142 err = ext4_mark_inode_dirty(handle, inode);
@@ -191,7 +193,7 @@ ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
191 ext4_fsblk_t goal, newblock; 193 ext4_fsblk_t goal, newblock;
192 194
193 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); 195 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
194 newblock = ext4_new_meta_block(handle, inode, goal, err); 196 newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err);
195 return newblock; 197 return newblock;
196} 198}
197 199
@@ -780,7 +782,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
780 set_buffer_uptodate(bh); 782 set_buffer_uptodate(bh);
781 unlock_buffer(bh); 783 unlock_buffer(bh);
782 784
783 err = ext4_journal_dirty_metadata(handle, bh); 785 err = ext4_handle_dirty_metadata(handle, inode, bh);
784 if (err) 786 if (err)
785 goto cleanup; 787 goto cleanup;
786 brelse(bh); 788 brelse(bh);
@@ -859,7 +861,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
859 set_buffer_uptodate(bh); 861 set_buffer_uptodate(bh);
860 unlock_buffer(bh); 862 unlock_buffer(bh);
861 863
862 err = ext4_journal_dirty_metadata(handle, bh); 864 err = ext4_handle_dirty_metadata(handle, inode, bh);
863 if (err) 865 if (err)
864 goto cleanup; 866 goto cleanup;
865 brelse(bh); 867 brelse(bh);
@@ -955,7 +957,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
955 set_buffer_uptodate(bh); 957 set_buffer_uptodate(bh);
956 unlock_buffer(bh); 958 unlock_buffer(bh);
957 959
958 err = ext4_journal_dirty_metadata(handle, bh); 960 err = ext4_handle_dirty_metadata(handle, inode, bh);
959 if (err) 961 if (err)
960 goto out; 962 goto out;
961 963
@@ -1160,15 +1162,13 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1160 while (--depth >= 0) { 1162 while (--depth >= 0) {
1161 ix = path[depth].p_idx; 1163 ix = path[depth].p_idx;
1162 if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) 1164 if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
1163 break; 1165 goto got_index;
1164 } 1166 }
1165 1167
1166 if (depth < 0) { 1168 /* we've gone up to the root and found no index to the right */
1167 /* we've gone up to the root and 1169 return 0;
1168 * found no index to the right */
1169 return 0;
1170 }
1171 1170
1171got_index:
1172 /* we've found index to the right, let's 1172 /* we've found index to the right, let's
1173 * follow it and find the closest allocated 1173 * follow it and find the closest allocated
1174 * block to the right */ 1174 * block to the right */
@@ -1201,7 +1201,6 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1201 *phys = ext_pblock(ex); 1201 *phys = ext_pblock(ex);
1202 put_bh(bh); 1202 put_bh(bh);
1203 return 0; 1203 return 0;
1204
1205} 1204}
1206 1205
1207/* 1206/*
@@ -1622,7 +1621,6 @@ cleanup:
1622 ext4_ext_drop_refs(npath); 1621 ext4_ext_drop_refs(npath);
1623 kfree(npath); 1622 kfree(npath);
1624 } 1623 }
1625 ext4_ext_tree_changed(inode);
1626 ext4_ext_invalidate_cache(inode); 1624 ext4_ext_invalidate_cache(inode);
1627 return err; 1625 return err;
1628} 1626}
@@ -2233,7 +2231,6 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2233 } 2231 }
2234 } 2232 }
2235out: 2233out:
2236 ext4_ext_tree_changed(inode);
2237 ext4_ext_drop_refs(path); 2234 ext4_ext_drop_refs(path);
2238 kfree(path); 2235 kfree(path);
2239 ext4_journal_stop(handle); 2236 ext4_journal_stop(handle);
@@ -2250,7 +2247,7 @@ void ext4_ext_init(struct super_block *sb)
2250 * possible initialization would be here 2247 * possible initialization would be here
2251 */ 2248 */
2252 2249
2253 if (test_opt(sb, EXTENTS)) { 2250 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2254 printk(KERN_INFO "EXT4-fs: file extents enabled"); 2251 printk(KERN_INFO "EXT4-fs: file extents enabled");
2255#ifdef AGGRESSIVE_TEST 2252#ifdef AGGRESSIVE_TEST
2256 printk(", aggressive tests"); 2253 printk(", aggressive tests");
@@ -2275,7 +2272,7 @@ void ext4_ext_init(struct super_block *sb)
2275 */ 2272 */
2276void ext4_ext_release(struct super_block *sb) 2273void ext4_ext_release(struct super_block *sb)
2277{ 2274{
2278 if (!test_opt(sb, EXTENTS)) 2275 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
2279 return; 2276 return;
2280 2277
2281#ifdef EXTENTS_STATS 2278#ifdef EXTENTS_STATS
@@ -2380,7 +2377,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2380 struct inode *inode, 2377 struct inode *inode,
2381 struct ext4_ext_path *path, 2378 struct ext4_ext_path *path,
2382 ext4_lblk_t iblock, 2379 ext4_lblk_t iblock,
2383 unsigned long max_blocks) 2380 unsigned int max_blocks)
2384{ 2381{
2385 struct ext4_extent *ex, newex, orig_ex; 2382 struct ext4_extent *ex, newex, orig_ex;
2386 struct ext4_extent *ex1 = NULL; 2383 struct ext4_extent *ex1 = NULL;
@@ -2536,7 +2533,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2536 */ 2533 */
2537 newdepth = ext_depth(inode); 2534 newdepth = ext_depth(inode);
2538 /* 2535 /*
2539 * update the extent length after successfull insert of the 2536 * update the extent length after successful insert of the
2540 * split extent 2537 * split extent
2541 */ 2538 */
2542 orig_ex.ee_len = cpu_to_le16(ee_len - 2539 orig_ex.ee_len = cpu_to_le16(ee_len -
@@ -2678,26 +2675,26 @@ fix_extent_len:
2678 */ 2675 */
2679int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 2676int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2680 ext4_lblk_t iblock, 2677 ext4_lblk_t iblock,
2681 unsigned long max_blocks, struct buffer_head *bh_result, 2678 unsigned int max_blocks, struct buffer_head *bh_result,
2682 int create, int extend_disksize) 2679 int create, int extend_disksize)
2683{ 2680{
2684 struct ext4_ext_path *path = NULL; 2681 struct ext4_ext_path *path = NULL;
2685 struct ext4_extent_header *eh; 2682 struct ext4_extent_header *eh;
2686 struct ext4_extent newex, *ex; 2683 struct ext4_extent newex, *ex;
2687 ext4_fsblk_t goal, newblock; 2684 ext4_fsblk_t newblock;
2688 int err = 0, depth, ret; 2685 int err = 0, depth, ret, cache_type;
2689 unsigned long allocated = 0; 2686 unsigned int allocated = 0;
2690 struct ext4_allocation_request ar; 2687 struct ext4_allocation_request ar;
2691 loff_t disksize; 2688 loff_t disksize;
2692 2689
2693 __clear_bit(BH_New, &bh_result->b_state); 2690 __clear_bit(BH_New, &bh_result->b_state);
2694 ext_debug("blocks %u/%lu requested for inode %u\n", 2691 ext_debug("blocks %u/%u requested for inode %u\n",
2695 iblock, max_blocks, inode->i_ino); 2692 iblock, max_blocks, inode->i_ino);
2696 2693
2697 /* check in cache */ 2694 /* check in cache */
2698 goal = ext4_ext_in_cache(inode, iblock, &newex); 2695 cache_type = ext4_ext_in_cache(inode, iblock, &newex);
2699 if (goal) { 2696 if (cache_type) {
2700 if (goal == EXT4_EXT_CACHE_GAP) { 2697 if (cache_type == EXT4_EXT_CACHE_GAP) {
2701 if (!create) { 2698 if (!create) {
2702 /* 2699 /*
2703 * block isn't allocated yet and 2700 * block isn't allocated yet and
@@ -2706,7 +2703,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2706 goto out2; 2703 goto out2;
2707 } 2704 }
2708 /* we should allocate requested block */ 2705 /* we should allocate requested block */
2709 } else if (goal == EXT4_EXT_CACHE_EXTENT) { 2706 } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
2710 /* block is already allocated */ 2707 /* block is already allocated */
2711 newblock = iblock 2708 newblock = iblock
2712 - le32_to_cpu(newex.ee_block) 2709 - le32_to_cpu(newex.ee_block)
@@ -2854,7 +2851,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2854 if (!newblock) 2851 if (!newblock)
2855 goto out2; 2852 goto out2;
2856 ext_debug("allocate new block: goal %llu, found %llu/%lu\n", 2853 ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
2857 goal, newblock, allocated); 2854 ar.goal, newblock, allocated);
2858 2855
2859 /* try to insert new extent into found leaf and return */ 2856 /* try to insert new extent into found leaf and return */
2860 ext4_ext_store_pblock(&newex, newblock); 2857 ext4_ext_store_pblock(&newex, newblock);
@@ -2950,7 +2947,7 @@ void ext4_ext_truncate(struct inode *inode)
2950 * transaction synchronous. 2947 * transaction synchronous.
2951 */ 2948 */
2952 if (IS_SYNC(inode)) 2949 if (IS_SYNC(inode))
2953 handle->h_sync = 1; 2950 ext4_handle_sync(handle);
2954 2951
2955out_stop: 2952out_stop:
2956 up_write(&EXT4_I(inode)->i_data_sem); 2953 up_write(&EXT4_I(inode)->i_data_sem);
@@ -3004,7 +3001,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
3004 handle_t *handle; 3001 handle_t *handle;
3005 ext4_lblk_t block; 3002 ext4_lblk_t block;
3006 loff_t new_size; 3003 loff_t new_size;
3007 unsigned long max_blocks; 3004 unsigned int max_blocks;
3008 int ret = 0; 3005 int ret = 0;
3009 int ret2 = 0; 3006 int ret2 = 0;
3010 int retries = 0; 3007 int retries = 0;
@@ -3083,7 +3080,7 @@ retry:
3083/* 3080/*
3084 * Callback function called for each extent to gather FIEMAP information. 3081 * Callback function called for each extent to gather FIEMAP information.
3085 */ 3082 */
3086int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, 3083static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3087 struct ext4_ext_cache *newex, struct ext4_extent *ex, 3084 struct ext4_ext_cache *newex, struct ext4_extent *ex,
3088 void *data) 3085 void *data)
3089{ 3086{
@@ -3152,7 +3149,8 @@ int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3152/* fiemap flags we can handle specified here */ 3149/* fiemap flags we can handle specified here */
3153#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 3150#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
3154 3151
3155int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) 3152static int ext4_xattr_fiemap(struct inode *inode,
3153 struct fiemap_extent_info *fieinfo)
3156{ 3154{
3157 __u64 physical = 0; 3155 __u64 physical = 0;
3158 __u64 length; 3156 __u64 length;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 6bd11fba71f7..f731cb545a03 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -140,9 +140,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
140 return 0; 140 return 0;
141} 141}
142 142
143extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
144 __u64 start, __u64 len);
145
146const struct file_operations ext4_file_operations = { 143const struct file_operations ext4_file_operations = {
147 .llseek = generic_file_llseek, 144 .llseek = generic_file_llseek,
148 .read = do_sync_read, 145 .read = do_sync_read,
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 556ca8eba3db..ac8f168c8ab4 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -35,23 +35,71 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
35 35
36 36
37/* The old legacy hash */ 37/* The old legacy hash */
38static __u32 dx_hack_hash(const char *name, int len) 38static __u32 dx_hack_hash_unsigned(const char *name, int len)
39{ 39{
40 __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; 40 __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
41 const unsigned char *ucp = (const unsigned char *) name;
42
43 while (len--) {
44 hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373));
45
46 if (hash & 0x80000000)
47 hash -= 0x7fffffff;
48 hash1 = hash0;
49 hash0 = hash;
50 }
51 return hash0 << 1;
52}
53
54static __u32 dx_hack_hash_signed(const char *name, int len)
55{
56 __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
57 const signed char *scp = (const signed char *) name;
58
41 while (len--) { 59 while (len--) {
42 __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); 60 hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373));
43 61
44 if (hash & 0x80000000) hash -= 0x7fffffff; 62 if (hash & 0x80000000)
63 hash -= 0x7fffffff;
45 hash1 = hash0; 64 hash1 = hash0;
46 hash0 = hash; 65 hash0 = hash;
47 } 66 }
48 return (hash0 << 1); 67 return hash0 << 1;
68}
69
70static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
71{
72 __u32 pad, val;
73 int i;
74 const signed char *scp = (const signed char *) msg;
75
76 pad = (__u32)len | ((__u32)len << 8);
77 pad |= pad << 16;
78
79 val = pad;
80 if (len > num*4)
81 len = num * 4;
82 for (i = 0; i < len; i++) {
83 if ((i % 4) == 0)
84 val = pad;
85 val = ((int) scp[i]) + (val << 8);
86 if ((i % 4) == 3) {
87 *buf++ = val;
88 val = pad;
89 num--;
90 }
91 }
92 if (--num >= 0)
93 *buf++ = val;
94 while (--num >= 0)
95 *buf++ = pad;
49} 96}
50 97
51static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) 98static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
52{ 99{
53 __u32 pad, val; 100 __u32 pad, val;
54 int i; 101 int i;
102 const unsigned char *ucp = (const unsigned char *) msg;
55 103
56 pad = (__u32)len | ((__u32)len << 8); 104 pad = (__u32)len | ((__u32)len << 8);
57 pad |= pad << 16; 105 pad |= pad << 16;
@@ -62,7 +110,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
62 for (i = 0; i < len; i++) { 110 for (i = 0; i < len; i++) {
63 if ((i % 4) == 0) 111 if ((i % 4) == 0)
64 val = pad; 112 val = pad;
65 val = msg[i] + (val << 8); 113 val = ((int) ucp[i]) + (val << 8);
66 if ((i % 4) == 3) { 114 if ((i % 4) == 3) {
67 *buf++ = val; 115 *buf++ = val;
68 val = pad; 116 val = pad;
@@ -95,6 +143,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
95 const char *p; 143 const char *p;
96 int i; 144 int i;
97 __u32 in[8], buf[4]; 145 __u32 in[8], buf[4];
146 void (*str2hashbuf)(const char *, int, __u32 *, int) =
147 str2hashbuf_signed;
98 148
99 /* Initialize the default seed for the hash checksum functions */ 149 /* Initialize the default seed for the hash checksum functions */
100 buf[0] = 0x67452301; 150 buf[0] = 0x67452301;
@@ -113,13 +163,18 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
113 } 163 }
114 164
115 switch (hinfo->hash_version) { 165 switch (hinfo->hash_version) {
166 case DX_HASH_LEGACY_UNSIGNED:
167 hash = dx_hack_hash_unsigned(name, len);
168 break;
116 case DX_HASH_LEGACY: 169 case DX_HASH_LEGACY:
117 hash = dx_hack_hash(name, len); 170 hash = dx_hack_hash_signed(name, len);
118 break; 171 break;
172 case DX_HASH_HALF_MD4_UNSIGNED:
173 str2hashbuf = str2hashbuf_unsigned;
119 case DX_HASH_HALF_MD4: 174 case DX_HASH_HALF_MD4:
120 p = name; 175 p = name;
121 while (len > 0) { 176 while (len > 0) {
122 str2hashbuf(p, len, in, 8); 177 (*str2hashbuf)(p, len, in, 8);
123 half_md4_transform(buf, in); 178 half_md4_transform(buf, in);
124 len -= 32; 179 len -= 32;
125 p += 32; 180 p += 32;
@@ -127,10 +182,12 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
127 minor_hash = buf[2]; 182 minor_hash = buf[2];
128 hash = buf[1]; 183 hash = buf[1];
129 break; 184 break;
185 case DX_HASH_TEA_UNSIGNED:
186 str2hashbuf = str2hashbuf_unsigned;
130 case DX_HASH_TEA: 187 case DX_HASH_TEA:
131 p = name; 188 p = name;
132 while (len > 0) { 189 while (len > 0) {
133 str2hashbuf(p, len, in, 4); 190 (*str2hashbuf)(p, len, in, 4);
134 TEA_transform(buf, in); 191 TEA_transform(buf, in);
135 len -= 16; 192 len -= 16;
136 p += 16; 193 p += 16;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 2a117e286e54..4fb86a0061d0 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -74,17 +74,17 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
74 /* If checksum is bad mark all blocks and inodes use to prevent 74 /* If checksum is bad mark all blocks and inodes use to prevent
75 * allocation, essentially implementing a per-group read-only flag. */ 75 * allocation, essentially implementing a per-group read-only flag. */
76 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 76 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
77 ext4_error(sb, __func__, "Checksum bad for group %lu\n", 77 ext4_error(sb, __func__, "Checksum bad for group %u",
78 block_group); 78 block_group);
79 gdp->bg_free_blocks_count = 0; 79 ext4_free_blks_set(sb, gdp, 0);
80 gdp->bg_free_inodes_count = 0; 80 ext4_free_inodes_set(sb, gdp, 0);
81 gdp->bg_itable_unused = 0; 81 ext4_itable_unused_set(sb, gdp, 0);
82 memset(bh->b_data, 0xff, sb->s_blocksize); 82 memset(bh->b_data, 0xff, sb->s_blocksize);
83 return 0; 83 return 0;
84 } 84 }
85 85
86 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); 86 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
87 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), 87 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
88 bh->b_data); 88 bh->b_data);
89 89
90 return EXT4_INODES_PER_GROUP(sb); 90 return EXT4_INODES_PER_GROUP(sb);
@@ -111,29 +111,49 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
111 if (unlikely(!bh)) { 111 if (unlikely(!bh)) {
112 ext4_error(sb, __func__, 112 ext4_error(sb, __func__,
113 "Cannot read inode bitmap - " 113 "Cannot read inode bitmap - "
114 "block_group = %lu, inode_bitmap = %llu", 114 "block_group = %u, inode_bitmap = %llu",
115 block_group, bitmap_blk); 115 block_group, bitmap_blk);
116 return NULL; 116 return NULL;
117 } 117 }
118 if (buffer_uptodate(bh) && 118 if (bitmap_uptodate(bh))
119 !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
120 return bh; 119 return bh;
121 120
122 lock_buffer(bh); 121 lock_buffer(bh);
122 if (bitmap_uptodate(bh)) {
123 unlock_buffer(bh);
124 return bh;
125 }
123 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 126 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
124 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 127 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
125 ext4_init_inode_bitmap(sb, bh, block_group, desc); 128 ext4_init_inode_bitmap(sb, bh, block_group, desc);
129 set_bitmap_uptodate(bh);
126 set_buffer_uptodate(bh); 130 set_buffer_uptodate(bh);
127 unlock_buffer(bh);
128 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 131 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
132 unlock_buffer(bh);
129 return bh; 133 return bh;
130 } 134 }
131 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 135 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
136 if (buffer_uptodate(bh)) {
137 /*
138 * if not uninit if bh is uptodate,
139 * bitmap is also uptodate
140 */
141 set_bitmap_uptodate(bh);
142 unlock_buffer(bh);
143 return bh;
144 }
145 /*
146 * submit the buffer_head for read. We can
147 * safely mark the bitmap as uptodate now.
148 * We do it here so the bitmap uptodate bit
149 * get set with buffer lock held.
150 */
151 set_bitmap_uptodate(bh);
132 if (bh_submit_read(bh) < 0) { 152 if (bh_submit_read(bh) < 0) {
133 put_bh(bh); 153 put_bh(bh);
134 ext4_error(sb, __func__, 154 ext4_error(sb, __func__,
135 "Cannot read inode bitmap - " 155 "Cannot read inode bitmap - "
136 "block_group = %lu, inode_bitmap = %llu", 156 "block_group = %u, inode_bitmap = %llu",
137 block_group, bitmap_blk); 157 block_group, bitmap_blk);
138 return NULL; 158 return NULL;
139 } 159 }
@@ -168,7 +188,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
168 struct ext4_group_desc *gdp; 188 struct ext4_group_desc *gdp;
169 struct ext4_super_block *es; 189 struct ext4_super_block *es;
170 struct ext4_sb_info *sbi; 190 struct ext4_sb_info *sbi;
171 int fatal = 0, err; 191 int fatal = 0, err, count;
172 ext4_group_t flex_group; 192 ext4_group_t flex_group;
173 193
174 if (atomic_read(&inode->i_count) > 1) { 194 if (atomic_read(&inode->i_count) > 1) {
@@ -190,6 +210,11 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
190 210
191 ino = inode->i_ino; 211 ino = inode->i_ino;
192 ext4_debug("freeing inode %lu\n", ino); 212 ext4_debug("freeing inode %lu\n", ino);
213 trace_mark(ext4_free_inode,
214 "dev %s ino %lu mode %d uid %lu gid %lu bocks %llu",
215 sb->s_id, inode->i_ino, inode->i_mode,
216 (unsigned long) inode->i_uid, (unsigned long) inode->i_gid,
217 (unsigned long long) inode->i_blocks);
193 218
194 /* 219 /*
195 * Note: we must free any quota before locking the superblock, 220 * Note: we must free any quota before locking the superblock,
@@ -236,9 +261,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
236 261
237 if (gdp) { 262 if (gdp) {
238 spin_lock(sb_bgl_lock(sbi, block_group)); 263 spin_lock(sb_bgl_lock(sbi, block_group));
239 le16_add_cpu(&gdp->bg_free_inodes_count, 1); 264 count = ext4_free_inodes_count(sb, gdp) + 1;
240 if (is_directory) 265 ext4_free_inodes_set(sb, gdp, count);
241 le16_add_cpu(&gdp->bg_used_dirs_count, -1); 266 if (is_directory) {
267 count = ext4_used_dirs_count(sb, gdp) - 1;
268 ext4_used_dirs_set(sb, gdp, count);
269 }
242 gdp->bg_checksum = ext4_group_desc_csum(sbi, 270 gdp->bg_checksum = ext4_group_desc_csum(sbi,
243 block_group, gdp); 271 block_group, gdp);
244 spin_unlock(sb_bgl_lock(sbi, block_group)); 272 spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -253,12 +281,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
253 spin_unlock(sb_bgl_lock(sbi, flex_group)); 281 spin_unlock(sb_bgl_lock(sbi, flex_group));
254 } 282 }
255 } 283 }
256 BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); 284 BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
257 err = ext4_journal_dirty_metadata(handle, bh2); 285 err = ext4_handle_dirty_metadata(handle, NULL, bh2);
258 if (!fatal) fatal = err; 286 if (!fatal) fatal = err;
259 } 287 }
260 BUFFER_TRACE(bitmap_bh, "call ext4_journal_dirty_metadata"); 288 BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
261 err = ext4_journal_dirty_metadata(handle, bitmap_bh); 289 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
262 if (!fatal) 290 if (!fatal)
263 fatal = err; 291 fatal = err;
264 sb->s_dirt = 1; 292 sb->s_dirt = 1;
@@ -291,13 +319,13 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
291 319
292 for (group = 0; group < ngroups; group++) { 320 for (group = 0; group < ngroups; group++) {
293 desc = ext4_get_group_desc(sb, group, NULL); 321 desc = ext4_get_group_desc(sb, group, NULL);
294 if (!desc || !desc->bg_free_inodes_count) 322 if (!desc || !ext4_free_inodes_count(sb, desc))
295 continue; 323 continue;
296 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) 324 if (ext4_free_inodes_count(sb, desc) < avefreei)
297 continue; 325 continue;
298 if (!best_desc || 326 if (!best_desc ||
299 (le16_to_cpu(desc->bg_free_blocks_count) > 327 (ext4_free_blks_count(sb, desc) >
300 le16_to_cpu(best_desc->bg_free_blocks_count))) { 328 ext4_free_blks_count(sb, best_desc))) {
301 *best_group = group; 329 *best_group = group;
302 best_desc = desc; 330 best_desc = desc;
303 ret = 0; 331 ret = 0;
@@ -369,7 +397,7 @@ found_flexbg:
369 for (i = best_flex * flex_size; i < ngroups && 397 for (i = best_flex * flex_size; i < ngroups &&
370 i < (best_flex + 1) * flex_size; i++) { 398 i < (best_flex + 1) * flex_size; i++) {
371 desc = ext4_get_group_desc(sb, i, &bh); 399 desc = ext4_get_group_desc(sb, i, &bh);
372 if (le16_to_cpu(desc->bg_free_inodes_count)) { 400 if (ext4_free_inodes_count(sb, desc)) {
373 *best_group = i; 401 *best_group = i;
374 goto out; 402 goto out;
375 } 403 }
@@ -443,17 +471,17 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
443 for (i = 0; i < ngroups; i++) { 471 for (i = 0; i < ngroups; i++) {
444 grp = (parent_group + i) % ngroups; 472 grp = (parent_group + i) % ngroups;
445 desc = ext4_get_group_desc(sb, grp, NULL); 473 desc = ext4_get_group_desc(sb, grp, NULL);
446 if (!desc || !desc->bg_free_inodes_count) 474 if (!desc || !ext4_free_inodes_count(sb, desc))
447 continue; 475 continue;
448 if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) 476 if (ext4_used_dirs_count(sb, desc) >= best_ndir)
449 continue; 477 continue;
450 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) 478 if (ext4_free_inodes_count(sb, desc) < avefreei)
451 continue; 479 continue;
452 if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) 480 if (ext4_free_blks_count(sb, desc) < avefreeb)
453 continue; 481 continue;
454 *group = grp; 482 *group = grp;
455 ret = 0; 483 ret = 0;
456 best_ndir = le16_to_cpu(desc->bg_used_dirs_count); 484 best_ndir = ext4_used_dirs_count(sb, desc);
457 } 485 }
458 if (ret == 0) 486 if (ret == 0)
459 return ret; 487 return ret;
@@ -479,13 +507,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
479 for (i = 0; i < ngroups; i++) { 507 for (i = 0; i < ngroups; i++) {
480 *group = (parent_group + i) % ngroups; 508 *group = (parent_group + i) % ngroups;
481 desc = ext4_get_group_desc(sb, *group, NULL); 509 desc = ext4_get_group_desc(sb, *group, NULL);
482 if (!desc || !desc->bg_free_inodes_count) 510 if (!desc || !ext4_free_inodes_count(sb, desc))
483 continue; 511 continue;
484 if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) 512 if (ext4_used_dirs_count(sb, desc) >= max_dirs)
485 continue; 513 continue;
486 if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) 514 if (ext4_free_inodes_count(sb, desc) < min_inodes)
487 continue; 515 continue;
488 if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) 516 if (ext4_free_blks_count(sb, desc) < min_blocks)
489 continue; 517 continue;
490 return 0; 518 return 0;
491 } 519 }
@@ -494,8 +522,8 @@ fallback:
494 for (i = 0; i < ngroups; i++) { 522 for (i = 0; i < ngroups; i++) {
495 *group = (parent_group + i) % ngroups; 523 *group = (parent_group + i) % ngroups;
496 desc = ext4_get_group_desc(sb, *group, NULL); 524 desc = ext4_get_group_desc(sb, *group, NULL);
497 if (desc && desc->bg_free_inodes_count && 525 if (desc && ext4_free_inodes_count(sb, desc) &&
498 le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) 526 ext4_free_inodes_count(sb, desc) >= avefreei)
499 return 0; 527 return 0;
500 } 528 }
501 529
@@ -524,8 +552,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
524 */ 552 */
525 *group = parent_group; 553 *group = parent_group;
526 desc = ext4_get_group_desc(sb, *group, NULL); 554 desc = ext4_get_group_desc(sb, *group, NULL);
527 if (desc && le16_to_cpu(desc->bg_free_inodes_count) && 555 if (desc && ext4_free_inodes_count(sb, desc) &&
528 le16_to_cpu(desc->bg_free_blocks_count)) 556 ext4_free_blks_count(sb, desc))
529 return 0; 557 return 0;
530 558
531 /* 559 /*
@@ -548,8 +576,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
548 if (*group >= ngroups) 576 if (*group >= ngroups)
549 *group -= ngroups; 577 *group -= ngroups;
550 desc = ext4_get_group_desc(sb, *group, NULL); 578 desc = ext4_get_group_desc(sb, *group, NULL);
551 if (desc && le16_to_cpu(desc->bg_free_inodes_count) && 579 if (desc && ext4_free_inodes_count(sb, desc) &&
552 le16_to_cpu(desc->bg_free_blocks_count)) 580 ext4_free_blks_count(sb, desc))
553 return 0; 581 return 0;
554 } 582 }
555 583
@@ -562,7 +590,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
562 if (++*group >= ngroups) 590 if (++*group >= ngroups)
563 *group = 0; 591 *group = 0;
564 desc = ext4_get_group_desc(sb, *group, NULL); 592 desc = ext4_get_group_desc(sb, *group, NULL);
565 if (desc && le16_to_cpu(desc->bg_free_inodes_count)) 593 if (desc && ext4_free_inodes_count(sb, desc))
566 return 0; 594 return 0;
567 } 595 }
568 596
@@ -570,6 +598,79 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
570} 598}
571 599
572/* 600/*
601 * claim the inode from the inode bitmap. If the group
602 * is uninit we need to take the groups's sb_bgl_lock
603 * and clear the uninit flag. The inode bitmap update
604 * and group desc uninit flag clear should be done
605 * after holding sb_bgl_lock so that ext4_read_inode_bitmap
606 * doesn't race with the ext4_claim_inode
607 */
608static int ext4_claim_inode(struct super_block *sb,
609 struct buffer_head *inode_bitmap_bh,
610 unsigned long ino, ext4_group_t group, int mode)
611{
612 int free = 0, retval = 0, count;
613 struct ext4_sb_info *sbi = EXT4_SB(sb);
614 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
615
616 spin_lock(sb_bgl_lock(sbi, group));
617 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
618 /* not a free inode */
619 retval = 1;
620 goto err_ret;
621 }
622 ino++;
623 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
624 ino > EXT4_INODES_PER_GROUP(sb)) {
625 spin_unlock(sb_bgl_lock(sbi, group));
626 ext4_error(sb, __func__,
627 "reserved inode or inode > inodes count - "
628 "block_group = %u, inode=%lu", group,
629 ino + group * EXT4_INODES_PER_GROUP(sb));
630 return 1;
631 }
632 /* If we didn't allocate from within the initialized part of the inode
633 * table then we need to initialize up to this inode. */
634 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
635
636 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
637 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
638 /* When marking the block group with
639 * ~EXT4_BG_INODE_UNINIT we don't want to depend
640 * on the value of bg_itable_unused even though
641 * mke2fs could have initialized the same for us.
642 * Instead we calculated the value below
643 */
644
645 free = 0;
646 } else {
647 free = EXT4_INODES_PER_GROUP(sb) -
648 ext4_itable_unused_count(sb, gdp);
649 }
650
651 /*
652 * Check the relative inode number against the last used
653 * relative inode number in this group. if it is greater
654 * we need to update the bg_itable_unused count
655 *
656 */
657 if (ino > free)
658 ext4_itable_unused_set(sb, gdp,
659 (EXT4_INODES_PER_GROUP(sb) - ino));
660 }
661 count = ext4_free_inodes_count(sb, gdp) - 1;
662 ext4_free_inodes_set(sb, gdp, count);
663 if (S_ISDIR(mode)) {
664 count = ext4_used_dirs_count(sb, gdp) + 1;
665 ext4_used_dirs_set(sb, gdp, count);
666 }
667 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
668err_ret:
669 spin_unlock(sb_bgl_lock(sbi, group));
670 return retval;
671}
672
673/*
573 * There are two policies for allocating an inode. If the new inode is 674 * There are two policies for allocating an inode. If the new inode is
574 * a directory, then a forward search is made for a block group with both 675 * a directory, then a forward search is made for a block group with both
575 * free space and a low directory-to-inode ratio; if that fails, then of 676 * free space and a low directory-to-inode ratio; if that fails, then of
@@ -582,8 +683,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
582struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) 683struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
583{ 684{
584 struct super_block *sb; 685 struct super_block *sb;
585 struct buffer_head *bitmap_bh = NULL; 686 struct buffer_head *inode_bitmap_bh = NULL;
586 struct buffer_head *bh2; 687 struct buffer_head *group_desc_bh;
587 ext4_group_t group = 0; 688 ext4_group_t group = 0;
588 unsigned long ino = 0; 689 unsigned long ino = 0;
589 struct inode *inode; 690 struct inode *inode;
@@ -602,6 +703,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
602 return ERR_PTR(-EPERM); 703 return ERR_PTR(-EPERM);
603 704
604 sb = dir->i_sb; 705 sb = dir->i_sb;
706 trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
707 dir->i_ino, mode);
605 inode = new_inode(sb); 708 inode = new_inode(sb);
606 if (!inode) 709 if (!inode)
607 return ERR_PTR(-ENOMEM); 710 return ERR_PTR(-ENOMEM);
@@ -631,40 +734,52 @@ got_group:
631 for (i = 0; i < sbi->s_groups_count; i++) { 734 for (i = 0; i < sbi->s_groups_count; i++) {
632 err = -EIO; 735 err = -EIO;
633 736
634 gdp = ext4_get_group_desc(sb, group, &bh2); 737 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
635 if (!gdp) 738 if (!gdp)
636 goto fail; 739 goto fail;
637 740
638 brelse(bitmap_bh); 741 brelse(inode_bitmap_bh);
639 bitmap_bh = ext4_read_inode_bitmap(sb, group); 742 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
640 if (!bitmap_bh) 743 if (!inode_bitmap_bh)
641 goto fail; 744 goto fail;
642 745
643 ino = 0; 746 ino = 0;
644 747
645repeat_in_this_group: 748repeat_in_this_group:
646 ino = ext4_find_next_zero_bit((unsigned long *) 749 ino = ext4_find_next_zero_bit((unsigned long *)
647 bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino); 750 inode_bitmap_bh->b_data,
751 EXT4_INODES_PER_GROUP(sb), ino);
752
648 if (ino < EXT4_INODES_PER_GROUP(sb)) { 753 if (ino < EXT4_INODES_PER_GROUP(sb)) {
649 754
650 BUFFER_TRACE(bitmap_bh, "get_write_access"); 755 BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
651 err = ext4_journal_get_write_access(handle, bitmap_bh); 756 err = ext4_journal_get_write_access(handle,
757 inode_bitmap_bh);
652 if (err) 758 if (err)
653 goto fail; 759 goto fail;
654 760
655 if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group), 761 BUFFER_TRACE(group_desc_bh, "get_write_access");
656 ino, bitmap_bh->b_data)) { 762 err = ext4_journal_get_write_access(handle,
763 group_desc_bh);
764 if (err)
765 goto fail;
766 if (!ext4_claim_inode(sb, inode_bitmap_bh,
767 ino, group, mode)) {
657 /* we won it */ 768 /* we won it */
658 BUFFER_TRACE(bitmap_bh, 769 BUFFER_TRACE(inode_bitmap_bh,
659 "call ext4_journal_dirty_metadata"); 770 "call ext4_handle_dirty_metadata");
660 err = ext4_journal_dirty_metadata(handle, 771 err = ext4_handle_dirty_metadata(handle,
661 bitmap_bh); 772 inode,
773 inode_bitmap_bh);
662 if (err) 774 if (err)
663 goto fail; 775 goto fail;
776 /* zero bit is inode number 1*/
777 ino++;
664 goto got; 778 goto got;
665 } 779 }
666 /* we lost it */ 780 /* we lost it */
667 jbd2_journal_release_buffer(handle, bitmap_bh); 781 ext4_handle_release_buffer(handle, inode_bitmap_bh);
782 ext4_handle_release_buffer(handle, group_desc_bh);
668 783
669 if (++ino < EXT4_INODES_PER_GROUP(sb)) 784 if (++ino < EXT4_INODES_PER_GROUP(sb))
670 goto repeat_in_this_group; 785 goto repeat_in_this_group;
@@ -684,30 +799,16 @@ repeat_in_this_group:
684 goto out; 799 goto out;
685 800
686got: 801got:
687 ino++;
688 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
689 ino > EXT4_INODES_PER_GROUP(sb)) {
690 ext4_error(sb, __func__,
691 "reserved inode or inode > inodes count - "
692 "block_group = %lu, inode=%lu", group,
693 ino + group * EXT4_INODES_PER_GROUP(sb));
694 err = -EIO;
695 goto fail;
696 }
697
698 BUFFER_TRACE(bh2, "get_write_access");
699 err = ext4_journal_get_write_access(handle, bh2);
700 if (err) goto fail;
701
702 /* We may have to initialize the block bitmap if it isn't already */ 802 /* We may have to initialize the block bitmap if it isn't already */
703 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && 803 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
704 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 804 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
705 struct buffer_head *block_bh = ext4_read_block_bitmap(sb, group); 805 struct buffer_head *block_bitmap_bh;
706 806
707 BUFFER_TRACE(block_bh, "get block bitmap access"); 807 block_bitmap_bh = ext4_read_block_bitmap(sb, group);
708 err = ext4_journal_get_write_access(handle, block_bh); 808 BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
809 err = ext4_journal_get_write_access(handle, block_bitmap_bh);
709 if (err) { 810 if (err) {
710 brelse(block_bh); 811 brelse(block_bitmap_bh);
711 goto fail; 812 goto fail;
712 } 813 }
713 814
@@ -715,9 +816,9 @@ got:
715 spin_lock(sb_bgl_lock(sbi, group)); 816 spin_lock(sb_bgl_lock(sbi, group));
716 /* recheck and clear flag under lock if we still need to */ 817 /* recheck and clear flag under lock if we still need to */
717 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 818 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
718 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
719 free = ext4_free_blocks_after_init(sb, group, gdp); 819 free = ext4_free_blocks_after_init(sb, group, gdp);
720 gdp->bg_free_blocks_count = cpu_to_le16(free); 820 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
821 ext4_free_blks_set(sb, gdp, free);
721 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, 822 gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
722 gdp); 823 gdp);
723 } 824 }
@@ -725,55 +826,19 @@ got:
725 826
726 /* Don't need to dirty bitmap block if we didn't change it */ 827 /* Don't need to dirty bitmap block if we didn't change it */
727 if (free) { 828 if (free) {
728 BUFFER_TRACE(block_bh, "dirty block bitmap"); 829 BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
729 err = ext4_journal_dirty_metadata(handle, block_bh); 830 err = ext4_handle_dirty_metadata(handle,
831 NULL, block_bitmap_bh);
730 } 832 }
731 833
732 brelse(block_bh); 834 brelse(block_bitmap_bh);
733 if (err) 835 if (err)
734 goto fail; 836 goto fail;
735 } 837 }
736 838 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
737 spin_lock(sb_bgl_lock(sbi, group)); 839 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
738 /* If we didn't allocate from within the initialized part of the inode 840 if (err)
739 * table then we need to initialize up to this inode. */ 841 goto fail;
740 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
741 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
742 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
743
744 /* When marking the block group with
745 * ~EXT4_BG_INODE_UNINIT we don't want to depend
746 * on the value of bg_itable_unused even though
747 * mke2fs could have initialized the same for us.
748 * Instead we calculated the value below
749 */
750
751 free = 0;
752 } else {
753 free = EXT4_INODES_PER_GROUP(sb) -
754 le16_to_cpu(gdp->bg_itable_unused);
755 }
756
757 /*
758 * Check the relative inode number against the last used
759 * relative inode number in this group. if it is greater
760 * we need to update the bg_itable_unused count
761 *
762 */
763 if (ino > free)
764 gdp->bg_itable_unused =
765 cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
766 }
767
768 le16_add_cpu(&gdp->bg_free_inodes_count, -1);
769 if (S_ISDIR(mode)) {
770 le16_add_cpu(&gdp->bg_used_dirs_count, 1);
771 }
772 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
773 spin_unlock(sb_bgl_lock(sbi, group));
774 BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
775 err = ext4_journal_dirty_metadata(handle, bh2);
776 if (err) goto fail;
777 842
778 percpu_counter_dec(&sbi->s_freeinodes_counter); 843 percpu_counter_dec(&sbi->s_freeinodes_counter);
779 if (S_ISDIR(mode)) 844 if (S_ISDIR(mode))
@@ -787,7 +852,7 @@ got:
787 spin_unlock(sb_bgl_lock(sbi, flex_group)); 852 spin_unlock(sb_bgl_lock(sbi, flex_group));
788 } 853 }
789 854
790 inode->i_uid = current->fsuid; 855 inode->i_uid = current_fsuid();
791 if (test_opt(sb, GRPID)) 856 if (test_opt(sb, GRPID))
792 inode->i_gid = dir->i_gid; 857 inode->i_gid = dir->i_gid;
793 else if (dir->i_mode & S_ISGID) { 858 else if (dir->i_mode & S_ISGID) {
@@ -795,7 +860,7 @@ got:
795 if (S_ISDIR(mode)) 860 if (S_ISDIR(mode))
796 mode |= S_ISGID; 861 mode |= S_ISGID;
797 } else 862 } else
798 inode->i_gid = current->fsgid; 863 inode->i_gid = current_fsgid();
799 inode->i_mode = mode; 864 inode->i_mode = mode;
800 865
801 inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); 866 inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
@@ -825,8 +890,11 @@ got:
825 890
826 ext4_set_inode_flags(inode); 891 ext4_set_inode_flags(inode);
827 if (IS_DIRSYNC(inode)) 892 if (IS_DIRSYNC(inode))
828 handle->h_sync = 1; 893 ext4_handle_sync(handle);
829 insert_inode_hash(inode); 894 if (insert_inode_locked(inode) < 0) {
895 err = -EINVAL;
896 goto fail_drop;
897 }
830 spin_lock(&sbi->s_next_gen_lock); 898 spin_lock(&sbi->s_next_gen_lock);
831 inode->i_generation = sbi->s_next_generation++; 899 inode->i_generation = sbi->s_next_generation++;
832 spin_unlock(&sbi->s_next_gen_lock); 900 spin_unlock(&sbi->s_next_gen_lock);
@@ -849,7 +917,7 @@ got:
849 if (err) 917 if (err)
850 goto fail_free_drop; 918 goto fail_free_drop;
851 919
852 if (test_opt(sb, EXTENTS)) { 920 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
853 /* set extent flag only for directory, file and normal symlink*/ 921 /* set extent flag only for directory, file and normal symlink*/
854 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { 922 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
855 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; 923 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
@@ -864,6 +932,8 @@ got:
864 } 932 }
865 933
866 ext4_debug("allocating inode %lu\n", inode->i_ino); 934 ext4_debug("allocating inode %lu\n", inode->i_ino);
935 trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d",
936 sb->s_id, inode->i_ino, dir->i_ino, mode);
867 goto really_out; 937 goto really_out;
868fail: 938fail:
869 ext4_std_error(sb, err); 939 ext4_std_error(sb, err);
@@ -871,7 +941,7 @@ out:
871 iput(inode); 941 iput(inode);
872 ret = ERR_PTR(err); 942 ret = ERR_PTR(err);
873really_out: 943really_out:
874 brelse(bitmap_bh); 944 brelse(inode_bitmap_bh);
875 return ret; 945 return ret;
876 946
877fail_free_drop: 947fail_free_drop:
@@ -881,8 +951,9 @@ fail_drop:
881 DQUOT_DROP(inode); 951 DQUOT_DROP(inode);
882 inode->i_flags |= S_NOQUOTA; 952 inode->i_flags |= S_NOQUOTA;
883 inode->i_nlink = 0; 953 inode->i_nlink = 0;
954 unlock_new_inode(inode);
884 iput(inode); 955 iput(inode);
885 brelse(bitmap_bh); 956 brelse(inode_bitmap_bh);
886 return ERR_PTR(err); 957 return ERR_PTR(err);
887} 958}
888 959
@@ -981,7 +1052,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
981 gdp = ext4_get_group_desc(sb, i, NULL); 1052 gdp = ext4_get_group_desc(sb, i, NULL);
982 if (!gdp) 1053 if (!gdp)
983 continue; 1054 continue;
984 desc_count += le16_to_cpu(gdp->bg_free_inodes_count); 1055 desc_count += ext4_free_inodes_count(sb, gdp);
985 brelse(bitmap_bh); 1056 brelse(bitmap_bh);
986 bitmap_bh = ext4_read_inode_bitmap(sb, i); 1057 bitmap_bh = ext4_read_inode_bitmap(sb, i);
987 if (!bitmap_bh) 1058 if (!bitmap_bh)
@@ -989,7 +1060,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
989 1060
990 x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); 1061 x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
991 printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", 1062 printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
992 i, le16_to_cpu(gdp->bg_free_inodes_count), x); 1063 i, ext4_free_inodes_count(sb, gdp), x);
993 bitmap_count += x; 1064 bitmap_count += x;
994 } 1065 }
995 brelse(bitmap_bh); 1066 brelse(bitmap_bh);
@@ -1003,7 +1074,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1003 gdp = ext4_get_group_desc(sb, i, NULL); 1074 gdp = ext4_get_group_desc(sb, i, NULL);
1004 if (!gdp) 1075 if (!gdp)
1005 continue; 1076 continue;
1006 desc_count += le16_to_cpu(gdp->bg_free_inodes_count); 1077 desc_count += ext4_free_inodes_count(sb, gdp);
1007 cond_resched(); 1078 cond_resched();
1008 } 1079 }
1009 return desc_count; 1080 return desc_count;
@@ -1020,8 +1091,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
1020 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1091 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1021 if (!gdp) 1092 if (!gdp)
1022 continue; 1093 continue;
1023 count += le16_to_cpu(gdp->bg_used_dirs_count); 1094 count += ext4_used_dirs_count(sb, gdp);
1024 } 1095 }
1025 return count; 1096 return count;
1026} 1097}
1027
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be21a5ae33cb..a6444cee0c7e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -34,6 +34,7 @@
34#include <linux/writeback.h> 34#include <linux/writeback.h>
35#include <linux/pagevec.h> 35#include <linux/pagevec.h>
36#include <linux/mpage.h> 36#include <linux/mpage.h>
37#include <linux/namei.h>
37#include <linux/uio.h> 38#include <linux/uio.h>
38#include <linux/bio.h> 39#include <linux/bio.h>
39#include "ext4_jbd2.h" 40#include "ext4_jbd2.h"
@@ -71,12 +72,17 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
71 * "bh" may be NULL: a metadata block may have been freed from memory 72 * "bh" may be NULL: a metadata block may have been freed from memory
72 * but there may still be a record of it in the journal, and that record 73 * but there may still be a record of it in the journal, and that record
73 * still needs to be revoked. 74 * still needs to be revoked.
75 *
76 * If the handle isn't valid we're not journaling so there's nothing to do.
74 */ 77 */
75int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 78int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
76 struct buffer_head *bh, ext4_fsblk_t blocknr) 79 struct buffer_head *bh, ext4_fsblk_t blocknr)
77{ 80{
78 int err; 81 int err;
79 82
83 if (!ext4_handle_valid(handle))
84 return 0;
85
80 might_sleep(); 86 might_sleep();
81 87
82 BUFFER_TRACE(bh, "enter"); 88 BUFFER_TRACE(bh, "enter");
@@ -169,7 +175,9 @@ static handle_t *start_transaction(struct inode *inode)
169 */ 175 */
170static int try_to_extend_transaction(handle_t *handle, struct inode *inode) 176static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
171{ 177{
172 if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) 178 if (!ext4_handle_valid(handle))
179 return 0;
180 if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
173 return 0; 181 return 0;
174 if (!ext4_journal_extend(handle, blocks_for_truncate(inode))) 182 if (!ext4_journal_extend(handle, blocks_for_truncate(inode)))
175 return 0; 183 return 0;
@@ -183,6 +191,7 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
183 */ 191 */
184static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) 192static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
185{ 193{
194 BUG_ON(EXT4_JOURNAL(inode) == NULL);
186 jbd_debug(2, "restarting handle %p\n", handle); 195 jbd_debug(2, "restarting handle %p\n", handle);
187 return ext4_journal_restart(handle, blocks_for_truncate(inode)); 196 return ext4_journal_restart(handle, blocks_for_truncate(inode));
188} 197}
@@ -215,7 +224,7 @@ void ext4_delete_inode(struct inode *inode)
215 } 224 }
216 225
217 if (IS_SYNC(inode)) 226 if (IS_SYNC(inode))
218 handle->h_sync = 1; 227 ext4_handle_sync(handle);
219 inode->i_size = 0; 228 inode->i_size = 0;
220 err = ext4_mark_inode_dirty(handle, inode); 229 err = ext4_mark_inode_dirty(handle, inode);
221 if (err) { 230 if (err) {
@@ -232,7 +241,7 @@ void ext4_delete_inode(struct inode *inode)
232 * enough credits left in the handle to remove the inode from 241 * enough credits left in the handle to remove the inode from
233 * the orphan list and set the dtime field. 242 * the orphan list and set the dtime field.
234 */ 243 */
235 if (handle->h_buffer_credits < 3) { 244 if (!ext4_handle_has_enough_credits(handle, 3)) {
236 err = ext4_journal_extend(handle, 3); 245 err = ext4_journal_extend(handle, 3);
237 if (err > 0) 246 if (err > 0)
238 err = ext4_journal_restart(handle, 3); 247 err = ext4_journal_restart(handle, 3);
@@ -505,10 +514,10 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
505 * return the total number of blocks to be allocate, including the 514 * return the total number of blocks to be allocate, including the
506 * direct and indirect blocks. 515 * direct and indirect blocks.
507 */ 516 */
508static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks, 517static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
509 int blocks_to_boundary) 518 int blocks_to_boundary)
510{ 519{
511 unsigned long count = 0; 520 unsigned int count = 0;
512 521
513 /* 522 /*
514 * Simple case, [t,d]Indirect block(s) has not allocated yet 523 * Simple case, [t,d]Indirect block(s) has not allocated yet
@@ -546,6 +555,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
546 int indirect_blks, int blks, 555 int indirect_blks, int blks,
547 ext4_fsblk_t new_blocks[4], int *err) 556 ext4_fsblk_t new_blocks[4], int *err)
548{ 557{
558 struct ext4_allocation_request ar;
549 int target, i; 559 int target, i;
550 unsigned long count = 0, blk_allocated = 0; 560 unsigned long count = 0, blk_allocated = 0;
551 int index = 0; 561 int index = 0;
@@ -594,10 +604,17 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
594 if (!target) 604 if (!target)
595 goto allocated; 605 goto allocated;
596 /* Now allocate data blocks */ 606 /* Now allocate data blocks */
597 count = target; 607 memset(&ar, 0, sizeof(ar));
598 /* allocating blocks for data blocks */ 608 ar.inode = inode;
599 current_block = ext4_new_blocks(handle, inode, iblock, 609 ar.goal = goal;
600 goal, &count, err); 610 ar.len = target;
611 ar.logical = iblock;
612 if (S_ISREG(inode->i_mode))
613 /* enable in-core preallocation only for regular files */
614 ar.flags = EXT4_MB_HINT_DATA;
615
616 current_block = ext4_mb_new_blocks(handle, &ar, err);
617
601 if (*err && (target == blks)) { 618 if (*err && (target == blks)) {
602 /* 619 /*
603 * if the allocation failed and we didn't allocate 620 * if the allocation failed and we didn't allocate
@@ -613,7 +630,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
613 */ 630 */
614 new_blocks[index] = current_block; 631 new_blocks[index] = current_block;
615 } 632 }
616 blk_allocated += count; 633 blk_allocated += ar.len;
617 } 634 }
618allocated: 635allocated:
619 /* total number of blocks allocated for direct blocks */ 636 /* total number of blocks allocated for direct blocks */
@@ -708,8 +725,8 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
708 set_buffer_uptodate(bh); 725 set_buffer_uptodate(bh);
709 unlock_buffer(bh); 726 unlock_buffer(bh);
710 727
711 BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); 728 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
712 err = ext4_journal_dirty_metadata(handle, bh); 729 err = ext4_handle_dirty_metadata(handle, inode, bh);
713 if (err) 730 if (err)
714 goto failed; 731 goto failed;
715 } 732 }
@@ -791,8 +808,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
791 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. 808 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
792 */ 809 */
793 jbd_debug(5, "splicing indirect only\n"); 810 jbd_debug(5, "splicing indirect only\n");
794 BUFFER_TRACE(where->bh, "call ext4_journal_dirty_metadata"); 811 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
795 err = ext4_journal_dirty_metadata(handle, where->bh); 812 err = ext4_handle_dirty_metadata(handle, inode, where->bh);
796 if (err) 813 if (err)
797 goto err_out; 814 goto err_out;
798 } else { 815 } else {
@@ -839,10 +856,10 @@ err_out:
839 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block 856 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
840 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) 857 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
841 */ 858 */
842int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, 859static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
843 ext4_lblk_t iblock, unsigned long maxblocks, 860 ext4_lblk_t iblock, unsigned int maxblocks,
844 struct buffer_head *bh_result, 861 struct buffer_head *bh_result,
845 int create, int extend_disksize) 862 int create, int extend_disksize)
846{ 863{
847 int err = -EIO; 864 int err = -EIO;
848 ext4_lblk_t offsets[4]; 865 ext4_lblk_t offsets[4];
@@ -1044,7 +1061,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1044 * It returns the error in case of allocation failure. 1061 * It returns the error in case of allocation failure.
1045 */ 1062 */
1046int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 1063int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1047 unsigned long max_blocks, struct buffer_head *bh, 1064 unsigned int max_blocks, struct buffer_head *bh,
1048 int create, int extend_disksize, int flag) 1065 int create, int extend_disksize, int flag)
1049{ 1066{
1050 int retval; 1067 int retval;
@@ -1220,8 +1237,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
1220 set_buffer_uptodate(bh); 1237 set_buffer_uptodate(bh);
1221 } 1238 }
1222 unlock_buffer(bh); 1239 unlock_buffer(bh);
1223 BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); 1240 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1224 err = ext4_journal_dirty_metadata(handle, bh); 1241 err = ext4_handle_dirty_metadata(handle, inode, bh);
1225 if (!fatal) 1242 if (!fatal)
1226 fatal = err; 1243 fatal = err;
1227 } else { 1244 } else {
@@ -1334,6 +1351,10 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
1334 pgoff_t index; 1351 pgoff_t index;
1335 unsigned from, to; 1352 unsigned from, to;
1336 1353
1354 trace_mark(ext4_write_begin,
1355 "dev %s ino %lu pos %llu len %u flags %u",
1356 inode->i_sb->s_id, inode->i_ino,
1357 (unsigned long long) pos, len, flags);
1337 index = pos >> PAGE_CACHE_SHIFT; 1358 index = pos >> PAGE_CACHE_SHIFT;
1338 from = pos & (PAGE_CACHE_SIZE - 1); 1359 from = pos & (PAGE_CACHE_SIZE - 1);
1339 to = from + len; 1360 to = from + len;
@@ -1345,7 +1366,7 @@ retry:
1345 goto out; 1366 goto out;
1346 } 1367 }
1347 1368
1348 page = __grab_cache_page(mapping, index); 1369 page = grab_cache_page_write_begin(mapping, index, flags);
1349 if (!page) { 1370 if (!page) {
1350 ext4_journal_stop(handle); 1371 ext4_journal_stop(handle);
1351 ret = -ENOMEM; 1372 ret = -ENOMEM;
@@ -1386,7 +1407,7 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1386 if (!buffer_mapped(bh) || buffer_freed(bh)) 1407 if (!buffer_mapped(bh) || buffer_freed(bh))
1387 return 0; 1408 return 0;
1388 set_buffer_uptodate(bh); 1409 set_buffer_uptodate(bh);
1389 return ext4_journal_dirty_metadata(handle, bh); 1410 return ext4_handle_dirty_metadata(handle, NULL, bh);
1390} 1411}
1391 1412
1392/* 1413/*
@@ -1405,6 +1426,10 @@ static int ext4_ordered_write_end(struct file *file,
1405 struct inode *inode = mapping->host; 1426 struct inode *inode = mapping->host;
1406 int ret = 0, ret2; 1427 int ret = 0, ret2;
1407 1428
1429 trace_mark(ext4_ordered_write_end,
1430 "dev %s ino %lu pos %llu len %u copied %u",
1431 inode->i_sb->s_id, inode->i_ino,
1432 (unsigned long long) pos, len, copied);
1408 ret = ext4_jbd2_file_inode(handle, inode); 1433 ret = ext4_jbd2_file_inode(handle, inode);
1409 1434
1410 if (ret == 0) { 1435 if (ret == 0) {
@@ -1443,6 +1468,10 @@ static int ext4_writeback_write_end(struct file *file,
1443 int ret = 0, ret2; 1468 int ret = 0, ret2;
1444 loff_t new_i_size; 1469 loff_t new_i_size;
1445 1470
1471 trace_mark(ext4_writeback_write_end,
1472 "dev %s ino %lu pos %llu len %u copied %u",
1473 inode->i_sb->s_id, inode->i_ino,
1474 (unsigned long long) pos, len, copied);
1446 new_i_size = pos + copied; 1475 new_i_size = pos + copied;
1447 if (new_i_size > EXT4_I(inode)->i_disksize) { 1476 if (new_i_size > EXT4_I(inode)->i_disksize) {
1448 ext4_update_i_disksize(inode, new_i_size); 1477 ext4_update_i_disksize(inode, new_i_size);
@@ -1478,6 +1507,10 @@ static int ext4_journalled_write_end(struct file *file,
1478 unsigned from, to; 1507 unsigned from, to;
1479 loff_t new_i_size; 1508 loff_t new_i_size;
1480 1509
1510 trace_mark(ext4_journalled_write_end,
1511 "dev %s ino %lu pos %llu len %u copied %u",
1512 inode->i_sb->s_id, inode->i_ino,
1513 (unsigned long long) pos, len, copied);
1481 from = pos & (PAGE_CACHE_SIZE - 1); 1514 from = pos & (PAGE_CACHE_SIZE - 1);
1482 to = from + len; 1515 to = from + len;
1483 1516
@@ -1624,7 +1657,7 @@ struct mpage_da_data {
1624 get_block_t *get_block; 1657 get_block_t *get_block;
1625 struct writeback_control *wbc; 1658 struct writeback_control *wbc;
1626 int io_done; 1659 int io_done;
1627 long pages_written; 1660 int pages_written;
1628 int retval; 1661 int retval;
1629}; 1662};
1630 1663
@@ -1644,35 +1677,39 @@ struct mpage_da_data {
1644 */ 1677 */
1645static int mpage_da_submit_io(struct mpage_da_data *mpd) 1678static int mpage_da_submit_io(struct mpage_da_data *mpd)
1646{ 1679{
1647 struct address_space *mapping = mpd->inode->i_mapping;
1648 int ret = 0, err, nr_pages, i;
1649 unsigned long index, end;
1650 struct pagevec pvec;
1651 long pages_skipped; 1680 long pages_skipped;
1681 struct pagevec pvec;
1682 unsigned long index, end;
1683 int ret = 0, err, nr_pages, i;
1684 struct inode *inode = mpd->inode;
1685 struct address_space *mapping = inode->i_mapping;
1652 1686
1653 BUG_ON(mpd->next_page <= mpd->first_page); 1687 BUG_ON(mpd->next_page <= mpd->first_page);
1654 pagevec_init(&pvec, 0); 1688 /*
1689 * We need to start from the first_page to the next_page - 1
1690 * to make sure we also write the mapped dirty buffer_heads.
1691 * If we look at mpd->lbh.b_blocknr we would only be looking
1692 * at the currently mapped buffer_heads.
1693 */
1655 index = mpd->first_page; 1694 index = mpd->first_page;
1656 end = mpd->next_page - 1; 1695 end = mpd->next_page - 1;
1657 1696
1697 pagevec_init(&pvec, 0);
1658 while (index <= end) { 1698 while (index <= end) {
1659 /* 1699 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
1660 * We can use PAGECACHE_TAG_DIRTY lookup here because
1661 * even though we have cleared the dirty flag on the page
1662 * We still keep the page in the radix tree with tag
1663 * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
1664 * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
1665 * which is called via the below writepage callback.
1666 */
1667 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1668 PAGECACHE_TAG_DIRTY,
1669 min(end - index,
1670 (pgoff_t)PAGEVEC_SIZE-1) + 1);
1671 if (nr_pages == 0) 1700 if (nr_pages == 0)
1672 break; 1701 break;
1673 for (i = 0; i < nr_pages; i++) { 1702 for (i = 0; i < nr_pages; i++) {
1674 struct page *page = pvec.pages[i]; 1703 struct page *page = pvec.pages[i];
1675 1704
1705 index = page->index;
1706 if (index > end)
1707 break;
1708 index++;
1709
1710 BUG_ON(!PageLocked(page));
1711 BUG_ON(PageWriteback(page));
1712
1676 pages_skipped = mpd->wbc->pages_skipped; 1713 pages_skipped = mpd->wbc->pages_skipped;
1677 err = mapping->a_ops->writepage(page, mpd->wbc); 1714 err = mapping->a_ops->writepage(page, mpd->wbc);
1678 if (!err && (pages_skipped == mpd->wbc->pages_skipped)) 1715 if (!err && (pages_skipped == mpd->wbc->pages_skipped))
@@ -1830,13 +1867,13 @@ static void ext4_print_free_blocks(struct inode *inode)
1830 ext4_count_free_blocks(inode->i_sb)); 1867 ext4_count_free_blocks(inode->i_sb));
1831 printk(KERN_EMERG "Free/Dirty block details\n"); 1868 printk(KERN_EMERG "Free/Dirty block details\n");
1832 printk(KERN_EMERG "free_blocks=%lld\n", 1869 printk(KERN_EMERG "free_blocks=%lld\n",
1833 percpu_counter_sum(&sbi->s_freeblocks_counter)); 1870 (long long)percpu_counter_sum(&sbi->s_freeblocks_counter));
1834 printk(KERN_EMERG "dirty_blocks=%lld\n", 1871 printk(KERN_EMERG "dirty_blocks=%lld\n",
1835 percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 1872 (long long)percpu_counter_sum(&sbi->s_dirtyblocks_counter));
1836 printk(KERN_EMERG "Block reservation details\n"); 1873 printk(KERN_EMERG "Block reservation details\n");
1837 printk(KERN_EMERG "i_reserved_data_blocks=%lu\n", 1874 printk(KERN_EMERG "i_reserved_data_blocks=%u\n",
1838 EXT4_I(inode)->i_reserved_data_blocks); 1875 EXT4_I(inode)->i_reserved_data_blocks);
1839 printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n", 1876 printk(KERN_EMERG "i_reserved_meta_blocks=%u\n",
1840 EXT4_I(inode)->i_reserved_meta_blocks); 1877 EXT4_I(inode)->i_reserved_meta_blocks);
1841 return; 1878 return;
1842} 1879}
@@ -2086,11 +2123,29 @@ static int __mpage_da_writepage(struct page *page,
2086 bh = head; 2123 bh = head;
2087 do { 2124 do {
2088 BUG_ON(buffer_locked(bh)); 2125 BUG_ON(buffer_locked(bh));
2126 /*
2127 * We need to try to allocate
2128 * unmapped blocks in the same page.
2129 * Otherwise we won't make progress
2130 * with the page in ext4_da_writepage
2131 */
2089 if (buffer_dirty(bh) && 2132 if (buffer_dirty(bh) &&
2090 (!buffer_mapped(bh) || buffer_delay(bh))) { 2133 (!buffer_mapped(bh) || buffer_delay(bh))) {
2091 mpage_add_bh_to_extent(mpd, logical, bh); 2134 mpage_add_bh_to_extent(mpd, logical, bh);
2092 if (mpd->io_done) 2135 if (mpd->io_done)
2093 return MPAGE_DA_EXTENT_TAIL; 2136 return MPAGE_DA_EXTENT_TAIL;
2137 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
2138 /*
2139 * mapped dirty buffer. We need to update
2140 * the b_state because we look at
2141 * b_state in mpage_da_map_blocks. We don't
2142 * update b_size because if we find an
2143 * unmapped buffer_head later we need to
2144 * use the b_state flag of that buffer_head.
2145 */
2146 if (mpd->lbh.b_size == 0)
2147 mpd->lbh.b_state =
2148 bh->b_state & BH_FLAGS;
2094 } 2149 }
2095 logical++; 2150 logical++;
2096 } while ((bh = bh->b_this_page) != head); 2151 } while ((bh = bh->b_this_page) != head);
@@ -2268,10 +2323,13 @@ static int ext4_da_writepage(struct page *page,
2268{ 2323{
2269 int ret = 0; 2324 int ret = 0;
2270 loff_t size; 2325 loff_t size;
2271 unsigned long len; 2326 unsigned int len;
2272 struct buffer_head *page_bufs; 2327 struct buffer_head *page_bufs;
2273 struct inode *inode = page->mapping->host; 2328 struct inode *inode = page->mapping->host;
2274 2329
2330 trace_mark(ext4_da_writepage,
2331 "dev %s ino %lu page_index %lu",
2332 inode->i_sb->s_id, inode->i_ino, page->index);
2275 size = i_size_read(inode); 2333 size = i_size_read(inode);
2276 if (page->index == size >> PAGE_CACHE_SHIFT) 2334 if (page->index == size >> PAGE_CACHE_SHIFT)
2277 len = size & ~PAGE_CACHE_MASK; 2335 len = size & ~PAGE_CACHE_MASK;
@@ -2377,10 +2435,25 @@ static int ext4_da_writepages(struct address_space *mapping,
2377 struct mpage_da_data mpd; 2435 struct mpage_da_data mpd;
2378 struct inode *inode = mapping->host; 2436 struct inode *inode = mapping->host;
2379 int no_nrwrite_index_update; 2437 int no_nrwrite_index_update;
2380 long pages_written = 0, pages_skipped; 2438 int pages_written = 0;
2439 long pages_skipped;
2381 int needed_blocks, ret = 0, nr_to_writebump = 0; 2440 int needed_blocks, ret = 0, nr_to_writebump = 0;
2382 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2441 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2383 2442
2443 trace_mark(ext4_da_writepages,
2444 "dev %s ino %lu nr_t_write %ld "
2445 "pages_skipped %ld range_start %llu "
2446 "range_end %llu nonblocking %d "
2447 "for_kupdate %d for_reclaim %d "
2448 "for_writepages %d range_cyclic %d",
2449 inode->i_sb->s_id, inode->i_ino,
2450 wbc->nr_to_write, wbc->pages_skipped,
2451 (unsigned long long) wbc->range_start,
2452 (unsigned long long) wbc->range_end,
2453 wbc->nonblocking, wbc->for_kupdate,
2454 wbc->for_reclaim, wbc->for_writepages,
2455 wbc->range_cyclic);
2456
2384 /* 2457 /*
2385 * No pages to write? This is mainly a kludge to avoid starting 2458 * No pages to write? This is mainly a kludge to avoid starting
2386 * a transaction for special inodes like journal inode on last iput() 2459 * a transaction for special inodes like journal inode on last iput()
@@ -2388,6 +2461,20 @@ static int ext4_da_writepages(struct address_space *mapping,
2388 */ 2461 */
2389 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 2462 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
2390 return 0; 2463 return 0;
2464
2465 /*
2466 * If the filesystem has aborted, it is read-only, so return
2467 * right away instead of dumping stack traces later on that
2468 * will obscure the real source of the problem. We test
2469 * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because
2470 * the latter could be true if the filesystem is mounted
2471 * read-only, and in that case, ext4_da_writepages should
2472 * *never* be called, so if that ever happens, we would want
2473 * the stack trace.
2474 */
2475 if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT))
2476 return -EROFS;
2477
2391 /* 2478 /*
2392 * Make sure nr_to_write is >= sbi->s_mb_stream_request 2479 * Make sure nr_to_write is >= sbi->s_mb_stream_request
2393 * This make sure small files blocks are allocated in 2480 * This make sure small files blocks are allocated in
@@ -2432,7 +2519,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2432 handle = ext4_journal_start(inode, needed_blocks); 2519 handle = ext4_journal_start(inode, needed_blocks);
2433 if (IS_ERR(handle)) { 2520 if (IS_ERR(handle)) {
2434 ret = PTR_ERR(handle); 2521 ret = PTR_ERR(handle);
2435 printk(KERN_EMERG "%s: jbd2_start: " 2522 printk(KERN_CRIT "%s: jbd2_start: "
2436 "%ld pages, ino %lu; err %d\n", __func__, 2523 "%ld pages, ino %lu; err %d\n", __func__,
2437 wbc->nr_to_write, inode->i_ino, ret); 2524 wbc->nr_to_write, inode->i_ino, ret);
2438 dump_stack(); 2525 dump_stack();
@@ -2485,6 +2572,14 @@ out_writepages:
2485 if (!no_nrwrite_index_update) 2572 if (!no_nrwrite_index_update)
2486 wbc->no_nrwrite_index_update = 0; 2573 wbc->no_nrwrite_index_update = 0;
2487 wbc->nr_to_write -= nr_to_writebump; 2574 wbc->nr_to_write -= nr_to_writebump;
2575 trace_mark(ext4_da_writepage_result,
2576 "dev %s ino %lu ret %d pages_written %d "
2577 "pages_skipped %ld congestion %d "
2578 "more_io %d no_nrwrite_index_update %d",
2579 inode->i_sb->s_id, inode->i_ino, ret,
2580 pages_written, wbc->pages_skipped,
2581 wbc->encountered_congestion, wbc->more_io,
2582 wbc->no_nrwrite_index_update);
2488 return ret; 2583 return ret;
2489} 2584}
2490 2585
@@ -2497,7 +2592,7 @@ static int ext4_nonda_switch(struct super_block *sb)
2497 /* 2592 /*
2498 * switch to non delalloc mode if we are running low 2593 * switch to non delalloc mode if we are running low
2499 * on free block. The free block accounting via percpu 2594 * on free block. The free block accounting via percpu
2500 * counters can get slightly wrong with FBC_BATCH getting 2595 * counters can get slightly wrong with percpu_counter_batch getting
2501 * accumulated on each CPU without updating global counters 2596 * accumulated on each CPU without updating global counters
2502 * Delalloc need an accurate free block accounting. So switch 2597 * Delalloc need an accurate free block accounting. So switch
2503 * to non delalloc when we are near to error range. 2598 * to non delalloc when we are near to error range.
@@ -2536,6 +2631,11 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2536 len, flags, pagep, fsdata); 2631 len, flags, pagep, fsdata);
2537 } 2632 }
2538 *fsdata = (void *)0; 2633 *fsdata = (void *)0;
2634
2635 trace_mark(ext4_da_write_begin,
2636 "dev %s ino %lu pos %llu len %u flags %u",
2637 inode->i_sb->s_id, inode->i_ino,
2638 (unsigned long long) pos, len, flags);
2539retry: 2639retry:
2540 /* 2640 /*
2541 * With delayed allocation, we don't log the i_disksize update 2641 * With delayed allocation, we don't log the i_disksize update
@@ -2549,7 +2649,7 @@ retry:
2549 goto out; 2649 goto out;
2550 } 2650 }
2551 2651
2552 page = __grab_cache_page(mapping, index); 2652 page = grab_cache_page_write_begin(mapping, index, flags);
2553 if (!page) { 2653 if (!page) {
2554 ext4_journal_stop(handle); 2654 ext4_journal_stop(handle);
2555 ret = -ENOMEM; 2655 ret = -ENOMEM;
@@ -2625,6 +2725,10 @@ static int ext4_da_write_end(struct file *file,
2625 } 2725 }
2626 } 2726 }
2627 2727
2728 trace_mark(ext4_da_write_end,
2729 "dev %s ino %lu pos %llu len %u copied %u",
2730 inode->i_sb->s_id, inode->i_ino,
2731 (unsigned long long) pos, len, copied);
2628 start = pos & (PAGE_CACHE_SIZE - 1); 2732 start = pos & (PAGE_CACHE_SIZE - 1);
2629 end = start + copied - 1; 2733 end = start + copied - 1;
2630 2734
@@ -2717,7 +2821,10 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
2717 filemap_write_and_wait(mapping); 2821 filemap_write_and_wait(mapping);
2718 } 2822 }
2719 2823
2720 if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { 2824 BUG_ON(!EXT4_JOURNAL(inode) &&
2825 EXT4_I(inode)->i_state & EXT4_STATE_JDATA);
2826
2827 if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
2721 /* 2828 /*
2722 * This is a REALLY heavyweight approach, but the use of 2829 * This is a REALLY heavyweight approach, but the use of
2723 * bmap on dirty files is expected to be extremely rare: 2830 * bmap on dirty files is expected to be extremely rare:
@@ -2835,6 +2942,9 @@ static int ext4_normal_writepage(struct page *page,
2835 loff_t size = i_size_read(inode); 2942 loff_t size = i_size_read(inode);
2836 loff_t len; 2943 loff_t len;
2837 2944
2945 trace_mark(ext4_normal_writepage,
2946 "dev %s ino %lu page_index %lu",
2947 inode->i_sb->s_id, inode->i_ino, page->index);
2838 J_ASSERT(PageLocked(page)); 2948 J_ASSERT(PageLocked(page));
2839 if (page->index == size >> PAGE_CACHE_SHIFT) 2949 if (page->index == size >> PAGE_CACHE_SHIFT)
2840 len = size & ~PAGE_CACHE_MASK; 2950 len = size & ~PAGE_CACHE_MASK;
@@ -2920,6 +3030,9 @@ static int ext4_journalled_writepage(struct page *page,
2920 loff_t size = i_size_read(inode); 3030 loff_t size = i_size_read(inode);
2921 loff_t len; 3031 loff_t len;
2922 3032
3033 trace_mark(ext4_journalled_writepage,
3034 "dev %s ino %lu page_index %lu",
3035 inode->i_sb->s_id, inode->i_ino, page->index);
2923 J_ASSERT(PageLocked(page)); 3036 J_ASSERT(PageLocked(page));
2924 if (page->index == size >> PAGE_CACHE_SHIFT) 3037 if (page->index == size >> PAGE_CACHE_SHIFT)
2925 len = size & ~PAGE_CACHE_MASK; 3038 len = size & ~PAGE_CACHE_MASK;
@@ -2988,7 +3101,10 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
2988 if (offset == 0) 3101 if (offset == 0)
2989 ClearPageChecked(page); 3102 ClearPageChecked(page);
2990 3103
2991 jbd2_journal_invalidatepage(journal, page, offset); 3104 if (journal)
3105 jbd2_journal_invalidatepage(journal, page, offset);
3106 else
3107 block_invalidatepage(page, offset);
2992} 3108}
2993 3109
2994static int ext4_releasepage(struct page *page, gfp_t wait) 3110static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -2998,7 +3114,10 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
2998 WARN_ON(PageChecked(page)); 3114 WARN_ON(PageChecked(page));
2999 if (!page_has_buffers(page)) 3115 if (!page_has_buffers(page))
3000 return 0; 3116 return 0;
3001 return jbd2_journal_try_to_free_buffers(journal, page, wait); 3117 if (journal)
3118 return jbd2_journal_try_to_free_buffers(journal, page, wait);
3119 else
3120 return try_to_free_buffers(page);
3002} 3121}
3003 3122
3004/* 3123/*
@@ -3270,7 +3389,7 @@ int ext4_block_truncate_page(handle_t *handle,
3270 3389
3271 err = 0; 3390 err = 0;
3272 if (ext4_should_journal_data(inode)) { 3391 if (ext4_should_journal_data(inode)) {
3273 err = ext4_journal_dirty_metadata(handle, bh); 3392 err = ext4_handle_dirty_metadata(handle, inode, bh);
3274 } else { 3393 } else {
3275 if (ext4_should_order_data(inode)) 3394 if (ext4_should_order_data(inode))
3276 err = ext4_jbd2_file_inode(handle, inode); 3395 err = ext4_jbd2_file_inode(handle, inode);
@@ -3394,8 +3513,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
3394 __le32 *p; 3513 __le32 *p;
3395 if (try_to_extend_transaction(handle, inode)) { 3514 if (try_to_extend_transaction(handle, inode)) {
3396 if (bh) { 3515 if (bh) {
3397 BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); 3516 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
3398 ext4_journal_dirty_metadata(handle, bh); 3517 ext4_handle_dirty_metadata(handle, inode, bh);
3399 } 3518 }
3400 ext4_mark_inode_dirty(handle, inode); 3519 ext4_mark_inode_dirty(handle, inode);
3401 ext4_journal_test_restart(handle, inode); 3520 ext4_journal_test_restart(handle, inode);
@@ -3495,7 +3614,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
3495 count, block_to_free_p, p); 3614 count, block_to_free_p, p);
3496 3615
3497 if (this_bh) { 3616 if (this_bh) {
3498 BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); 3617 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
3499 3618
3500 /* 3619 /*
3501 * The buffer head should have an attached journal head at this 3620 * The buffer head should have an attached journal head at this
@@ -3504,7 +3623,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
3504 * the block was cleared. Check for this instead of OOPSing. 3623 * the block was cleared. Check for this instead of OOPSing.
3505 */ 3624 */
3506 if (bh2jh(this_bh)) 3625 if (bh2jh(this_bh))
3507 ext4_journal_dirty_metadata(handle, this_bh); 3626 ext4_handle_dirty_metadata(handle, inode, this_bh);
3508 else 3627 else
3509 ext4_error(inode->i_sb, __func__, 3628 ext4_error(inode->i_sb, __func__,
3510 "circular indirect block detected, " 3629 "circular indirect block detected, "
@@ -3534,7 +3653,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
3534 ext4_fsblk_t nr; 3653 ext4_fsblk_t nr;
3535 __le32 *p; 3654 __le32 *p;
3536 3655
3537 if (is_handle_aborted(handle)) 3656 if (ext4_handle_is_aborted(handle))
3538 return; 3657 return;
3539 3658
3540 if (depth--) { 3659 if (depth--) {
@@ -3604,7 +3723,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
3604 * will merely complain about releasing a free block, 3723 * will merely complain about releasing a free block,
3605 * rather than leaking blocks. 3724 * rather than leaking blocks.
3606 */ 3725 */
3607 if (is_handle_aborted(handle)) 3726 if (ext4_handle_is_aborted(handle))
3608 return; 3727 return;
3609 if (try_to_extend_transaction(handle, inode)) { 3728 if (try_to_extend_transaction(handle, inode)) {
3610 ext4_mark_inode_dirty(handle, inode); 3729 ext4_mark_inode_dirty(handle, inode);
@@ -3623,9 +3742,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
3623 parent_bh)){ 3742 parent_bh)){
3624 *p = 0; 3743 *p = 0;
3625 BUFFER_TRACE(parent_bh, 3744 BUFFER_TRACE(parent_bh,
3626 "call ext4_journal_dirty_metadata"); 3745 "call ext4_handle_dirty_metadata");
3627 ext4_journal_dirty_metadata(handle, 3746 ext4_handle_dirty_metadata(handle,
3628 parent_bh); 3747 inode,
3748 parent_bh);
3629 } 3749 }
3630 } 3750 }
3631 } 3751 }
@@ -3813,7 +3933,7 @@ do_indirects:
3813 * synchronous 3933 * synchronous
3814 */ 3934 */
3815 if (IS_SYNC(inode)) 3935 if (IS_SYNC(inode))
3816 handle->h_sync = 1; 3936 ext4_handle_sync(handle);
3817out_stop: 3937out_stop:
3818 /* 3938 /*
3819 * If this was a simple ftruncate(), and the file will remain alive 3939 * If this was a simple ftruncate(), and the file will remain alive
@@ -3843,7 +3963,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
3843 ext4_fsblk_t block; 3963 ext4_fsblk_t block;
3844 int inodes_per_block, inode_offset; 3964 int inodes_per_block, inode_offset;
3845 3965
3846 iloc->bh = 0; 3966 iloc->bh = NULL;
3847 if (!ext4_valid_inum(sb, inode->i_ino)) 3967 if (!ext4_valid_inum(sb, inode->i_ino))
3848 return -EIO; 3968 return -EIO;
3849 3969
@@ -3950,7 +4070,7 @@ make_io:
3950 num = EXT4_INODES_PER_GROUP(sb); 4070 num = EXT4_INODES_PER_GROUP(sb);
3951 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 4071 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3952 EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) 4072 EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
3953 num -= le16_to_cpu(gdp->bg_itable_unused); 4073 num -= ext4_itable_unused_count(sb, gdp);
3954 table += num / inodes_per_block; 4074 table += num / inodes_per_block;
3955 if (end > table) 4075 if (end > table)
3956 end = table; 4076 end = table;
@@ -4164,9 +4284,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4164 inode->i_op = &ext4_dir_inode_operations; 4284 inode->i_op = &ext4_dir_inode_operations;
4165 inode->i_fop = &ext4_dir_operations; 4285 inode->i_fop = &ext4_dir_operations;
4166 } else if (S_ISLNK(inode->i_mode)) { 4286 } else if (S_ISLNK(inode->i_mode)) {
4167 if (ext4_inode_is_fast_symlink(inode)) 4287 if (ext4_inode_is_fast_symlink(inode)) {
4168 inode->i_op = &ext4_fast_symlink_inode_operations; 4288 inode->i_op = &ext4_fast_symlink_inode_operations;
4169 else { 4289 nd_terminate_link(ei->i_data, inode->i_size,
4290 sizeof(ei->i_data) - 1);
4291 } else {
4170 inode->i_op = &ext4_symlink_inode_operations; 4292 inode->i_op = &ext4_symlink_inode_operations;
4171 ext4_set_aops(inode); 4293 ext4_set_aops(inode);
4172 } 4294 }
@@ -4310,8 +4432,8 @@ static int ext4_do_update_inode(handle_t *handle,
4310 EXT4_SET_RO_COMPAT_FEATURE(sb, 4432 EXT4_SET_RO_COMPAT_FEATURE(sb,
4311 EXT4_FEATURE_RO_COMPAT_LARGE_FILE); 4433 EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
4312 sb->s_dirt = 1; 4434 sb->s_dirt = 1;
4313 handle->h_sync = 1; 4435 ext4_handle_sync(handle);
4314 err = ext4_journal_dirty_metadata(handle, 4436 err = ext4_handle_dirty_metadata(handle, inode,
4315 EXT4_SB(sb)->s_sbh); 4437 EXT4_SB(sb)->s_sbh);
4316 } 4438 }
4317 } 4439 }
@@ -4338,9 +4460,8 @@ static int ext4_do_update_inode(handle_t *handle,
4338 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); 4460 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
4339 } 4461 }
4340 4462
4341 4463 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4342 BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); 4464 rc = ext4_handle_dirty_metadata(handle, inode, bh);
4343 rc = ext4_journal_dirty_metadata(handle, bh);
4344 if (!err) 4465 if (!err)
4345 err = rc; 4466 err = rc;
4346 ei->i_state &= ~EXT4_STATE_NEW; 4467 ei->i_state &= ~EXT4_STATE_NEW;
@@ -4403,6 +4524,25 @@ int ext4_write_inode(struct inode *inode, int wait)
4403 return ext4_force_commit(inode->i_sb); 4524 return ext4_force_commit(inode->i_sb);
4404} 4525}
4405 4526
4527int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
4528{
4529 int err = 0;
4530
4531 mark_buffer_dirty(bh);
4532 if (inode && inode_needs_sync(inode)) {
4533 sync_dirty_buffer(bh);
4534 if (buffer_req(bh) && !buffer_uptodate(bh)) {
4535 ext4_error(inode->i_sb, __func__,
4536 "IO error syncing inode, "
4537 "inode=%lu, block=%llu",
4538 inode->i_ino,
4539 (unsigned long long)bh->b_blocknr);
4540 err = -EIO;
4541 }
4542 }
4543 return err;
4544}
4545
4406/* 4546/*
4407 * ext4_setattr() 4547 * ext4_setattr()
4408 * 4548 *
@@ -4707,16 +4847,15 @@ int
4707ext4_reserve_inode_write(handle_t *handle, struct inode *inode, 4847ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
4708 struct ext4_iloc *iloc) 4848 struct ext4_iloc *iloc)
4709{ 4849{
4710 int err = 0; 4850 int err;
4711 if (handle) { 4851
4712 err = ext4_get_inode_loc(inode, iloc); 4852 err = ext4_get_inode_loc(inode, iloc);
4713 if (!err) { 4853 if (!err) {
4714 BUFFER_TRACE(iloc->bh, "get_write_access"); 4854 BUFFER_TRACE(iloc->bh, "get_write_access");
4715 err = ext4_journal_get_write_access(handle, iloc->bh); 4855 err = ext4_journal_get_write_access(handle, iloc->bh);
4716 if (err) { 4856 if (err) {
4717 brelse(iloc->bh); 4857 brelse(iloc->bh);
4718 iloc->bh = NULL; 4858 iloc->bh = NULL;
4719 }
4720 } 4859 }
4721 } 4860 }
4722 ext4_std_error(inode->i_sb, err); 4861 ext4_std_error(inode->i_sb, err);
@@ -4788,7 +4927,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
4788 4927
4789 might_sleep(); 4928 might_sleep();
4790 err = ext4_reserve_inode_write(handle, inode, &iloc); 4929 err = ext4_reserve_inode_write(handle, inode, &iloc);
4791 if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && 4930 if (ext4_handle_valid(handle) &&
4931 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
4792 !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { 4932 !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) {
4793 /* 4933 /*
4794 * We need extra buffer credits since we may write into EA block 4934 * We need extra buffer credits since we may write into EA block
@@ -4840,6 +4980,11 @@ void ext4_dirty_inode(struct inode *inode)
4840 handle_t *current_handle = ext4_journal_current_handle(); 4980 handle_t *current_handle = ext4_journal_current_handle();
4841 handle_t *handle; 4981 handle_t *handle;
4842 4982
4983 if (!ext4_handle_valid(current_handle)) {
4984 ext4_mark_inode_dirty(current_handle, inode);
4985 return;
4986 }
4987
4843 handle = ext4_journal_start(inode, 2); 4988 handle = ext4_journal_start(inode, 2);
4844 if (IS_ERR(handle)) 4989 if (IS_ERR(handle))
4845 goto out; 4990 goto out;
@@ -4877,8 +5022,9 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode)
4877 BUFFER_TRACE(iloc.bh, "get_write_access"); 5022 BUFFER_TRACE(iloc.bh, "get_write_access");
4878 err = jbd2_journal_get_write_access(handle, iloc.bh); 5023 err = jbd2_journal_get_write_access(handle, iloc.bh);
4879 if (!err) 5024 if (!err)
4880 err = ext4_journal_dirty_metadata(handle, 5025 err = ext4_handle_dirty_metadata(handle,
4881 iloc.bh); 5026 inode,
5027 iloc.bh);
4882 brelse(iloc.bh); 5028 brelse(iloc.bh);
4883 } 5029 }
4884 } 5030 }
@@ -4904,6 +5050,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4904 */ 5050 */
4905 5051
4906 journal = EXT4_JOURNAL(inode); 5052 journal = EXT4_JOURNAL(inode);
5053 if (!journal)
5054 return 0;
4907 if (is_journal_aborted(journal)) 5055 if (is_journal_aborted(journal))
4908 return -EROFS; 5056 return -EROFS;
4909 5057
@@ -4933,7 +5081,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4933 return PTR_ERR(handle); 5081 return PTR_ERR(handle);
4934 5082
4935 err = ext4_mark_inode_dirty(handle, inode); 5083 err = ext4_mark_inode_dirty(handle, inode);
4936 handle->h_sync = 1; 5084 ext4_handle_sync(handle);
4937 ext4_journal_stop(handle); 5085 ext4_journal_stop(handle);
4938 ext4_std_error(inode->i_sb, err); 5086 ext4_std_error(inode->i_sb, err);
4939 5087
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index dc99b4776d58..42dc83fb247a 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -99,7 +99,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
99 goto flags_out; 99 goto flags_out;
100 } 100 }
101 if (IS_SYNC(inode)) 101 if (IS_SYNC(inode))
102 handle->h_sync = 1; 102 ext4_handle_sync(handle);
103 err = ext4_reserve_inode_write(handle, inode, &iloc); 103 err = ext4_reserve_inode_write(handle, inode, &iloc);
104 if (err) 104 if (err)
105 goto flags_err; 105 goto flags_err;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 444ad998f72e..918aec0c8a11 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -100,7 +100,7 @@
100 * inode as: 100 * inode as:
101 * 101 *
102 * { page } 102 * { page }
103 * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]... 103 * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
104 * 104 *
105 * 105 *
106 * one block each for bitmap and buddy information. So for each group we 106 * one block each for bitmap and buddy information. So for each group we
@@ -330,6 +330,18 @@
330 * object 330 * object
331 * 331 *
332 */ 332 */
333static struct kmem_cache *ext4_pspace_cachep;
334static struct kmem_cache *ext4_ac_cachep;
335static struct kmem_cache *ext4_free_ext_cachep;
336static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
337 ext4_group_t group);
338static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
339 ext4_group_t group);
340static int ext4_mb_init_per_dev_proc(struct super_block *sb);
341static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
342static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
343
344
333 345
334static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 346static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
335{ 347{
@@ -445,9 +457,9 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
445 blocknr += first + i; 457 blocknr += first + i;
446 blocknr += 458 blocknr +=
447 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 459 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
448 460 ext4_grp_locked_error(sb, e4b->bd_group,
449 ext4_error(sb, __func__, "double-free of inode" 461 __func__, "double-free of inode"
450 " %lu's block %llu(bit %u in group %lu)\n", 462 " %lu's block %llu(bit %u in group %u)",
451 inode ? inode->i_ino : 0, blocknr, 463 inode ? inode->i_ino : 0, blocknr,
452 first + i, e4b->bd_group); 464 first + i, e4b->bd_group);
453 } 465 }
@@ -477,7 +489,7 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
477 b2 = (unsigned char *) bitmap; 489 b2 = (unsigned char *) bitmap;
478 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { 490 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
479 if (b1[i] != b2[i]) { 491 if (b1[i] != b2[i]) {
480 printk(KERN_ERR "corruption in group %lu " 492 printk(KERN_ERR "corruption in group %u "
481 "at byte %u(%u): %x in copy != %x " 493 "at byte %u(%u): %x in copy != %x "
482 "on disk/prealloc\n", 494 "on disk/prealloc\n",
483 e4b->bd_group, i, i * 8, b1[i], b2[i]); 495 e4b->bd_group, i, i * 8, b1[i], b2[i]);
@@ -690,8 +702,8 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
690 grp->bb_fragments = fragments; 702 grp->bb_fragments = fragments;
691 703
692 if (free != grp->bb_free) { 704 if (free != grp->bb_free) {
693 ext4_error(sb, __func__, 705 ext4_grp_locked_error(sb, group, __func__,
694 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", 706 "EXT4-fs: group %u: %u blocks in bitmap, %u in gd",
695 group, free, grp->bb_free); 707 group, free, grp->bb_free);
696 /* 708 /*
697 * If we intent to continue, we consider group descritor 709 * If we intent to continue, we consider group descritor
@@ -716,7 +728,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
716 * stored in the inode as 728 * stored in the inode as
717 * 729 *
718 * { page } 730 * { page }
719 * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]... 731 * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
720 * 732 *
721 * 733 *
722 * one block each for bitmap and buddy information. 734 * one block each for bitmap and buddy information.
@@ -782,25 +794,45 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
782 if (bh[i] == NULL) 794 if (bh[i] == NULL)
783 goto out; 795 goto out;
784 796
785 if (buffer_uptodate(bh[i]) && 797 if (bitmap_uptodate(bh[i]))
786 !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
787 continue; 798 continue;
788 799
789 lock_buffer(bh[i]); 800 lock_buffer(bh[i]);
801 if (bitmap_uptodate(bh[i])) {
802 unlock_buffer(bh[i]);
803 continue;
804 }
790 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 805 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
791 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 806 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
792 ext4_init_block_bitmap(sb, bh[i], 807 ext4_init_block_bitmap(sb, bh[i],
793 first_group + i, desc); 808 first_group + i, desc);
809 set_bitmap_uptodate(bh[i]);
794 set_buffer_uptodate(bh[i]); 810 set_buffer_uptodate(bh[i]);
795 unlock_buffer(bh[i]);
796 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 811 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
812 unlock_buffer(bh[i]);
797 continue; 813 continue;
798 } 814 }
799 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 815 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
816 if (buffer_uptodate(bh[i])) {
817 /*
818 * if not uninit if bh is uptodate,
819 * bitmap is also uptodate
820 */
821 set_bitmap_uptodate(bh[i]);
822 unlock_buffer(bh[i]);
823 continue;
824 }
800 get_bh(bh[i]); 825 get_bh(bh[i]);
826 /*
827 * submit the buffer_head for read. We can
828 * safely mark the bitmap as uptodate now.
829 * We do it here so the bitmap uptodate bit
830 * get set with buffer lock held.
831 */
832 set_bitmap_uptodate(bh[i]);
801 bh[i]->b_end_io = end_buffer_read_sync; 833 bh[i]->b_end_io = end_buffer_read_sync;
802 submit_bh(READ, bh[i]); 834 submit_bh(READ, bh[i]);
803 mb_debug("read bitmap for group %lu\n", first_group + i); 835 mb_debug("read bitmap for group %u\n", first_group + i);
804 } 836 }
805 837
806 /* wait for I/O completion */ 838 /* wait for I/O completion */
@@ -814,6 +846,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
814 846
815 err = 0; 847 err = 0;
816 first_block = page->index * blocks_per_page; 848 first_block = page->index * blocks_per_page;
849 /* init the page */
850 memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
817 for (i = 0; i < blocks_per_page; i++) { 851 for (i = 0; i < blocks_per_page; i++) {
818 int group; 852 int group;
819 struct ext4_group_info *grinfo; 853 struct ext4_group_info *grinfo;
@@ -840,7 +874,6 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
840 BUG_ON(incore == NULL); 874 BUG_ON(incore == NULL);
841 mb_debug("put buddy for group %u in page %lu/%x\n", 875 mb_debug("put buddy for group %u in page %lu/%x\n",
842 group, page->index, i * blocksize); 876 group, page->index, i * blocksize);
843 memset(data, 0xff, blocksize);
844 grinfo = ext4_get_group_info(sb, group); 877 grinfo = ext4_get_group_info(sb, group);
845 grinfo->bb_fragments = 0; 878 grinfo->bb_fragments = 0;
846 memset(grinfo->bb_counters, 0, 879 memset(grinfo->bb_counters, 0,
@@ -848,7 +881,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
848 /* 881 /*
849 * incore got set to the group block bitmap below 882 * incore got set to the group block bitmap below
850 */ 883 */
884 ext4_lock_group(sb, group);
851 ext4_mb_generate_buddy(sb, data, incore, group); 885 ext4_mb_generate_buddy(sb, data, incore, group);
886 ext4_unlock_group(sb, group);
852 incore = NULL; 887 incore = NULL;
853 } else { 888 } else {
854 /* this is block of bitmap */ 889 /* this is block of bitmap */
@@ -862,6 +897,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
862 897
863 /* mark all preallocated blks used in in-core bitmap */ 898 /* mark all preallocated blks used in in-core bitmap */
864 ext4_mb_generate_from_pa(sb, data, group); 899 ext4_mb_generate_from_pa(sb, data, group);
900 ext4_mb_generate_from_freelist(sb, data, group);
865 ext4_unlock_group(sb, group); 901 ext4_unlock_group(sb, group);
866 902
867 /* set incore so that the buddy information can be 903 /* set incore so that the buddy information can be
@@ -886,18 +922,20 @@ static noinline_for_stack int
886ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, 922ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
887 struct ext4_buddy *e4b) 923 struct ext4_buddy *e4b)
888{ 924{
889 struct ext4_sb_info *sbi = EXT4_SB(sb);
890 struct inode *inode = sbi->s_buddy_cache;
891 int blocks_per_page; 925 int blocks_per_page;
892 int block; 926 int block;
893 int pnum; 927 int pnum;
894 int poff; 928 int poff;
895 struct page *page; 929 struct page *page;
896 int ret; 930 int ret;
931 struct ext4_group_info *grp;
932 struct ext4_sb_info *sbi = EXT4_SB(sb);
933 struct inode *inode = sbi->s_buddy_cache;
897 934
898 mb_debug("load group %lu\n", group); 935 mb_debug("load group %u\n", group);
899 936
900 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; 937 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
938 grp = ext4_get_group_info(sb, group);
901 939
902 e4b->bd_blkbits = sb->s_blocksize_bits; 940 e4b->bd_blkbits = sb->s_blocksize_bits;
903 e4b->bd_info = ext4_get_group_info(sb, group); 941 e4b->bd_info = ext4_get_group_info(sb, group);
@@ -905,6 +943,15 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
905 e4b->bd_group = group; 943 e4b->bd_group = group;
906 e4b->bd_buddy_page = NULL; 944 e4b->bd_buddy_page = NULL;
907 e4b->bd_bitmap_page = NULL; 945 e4b->bd_bitmap_page = NULL;
946 e4b->alloc_semp = &grp->alloc_sem;
947
948 /* Take the read lock on the group alloc
949 * sem. This would make sure a parallel
950 * ext4_mb_init_group happening on other
951 * groups mapped by the page is blocked
952 * till we are done with allocation
953 */
954 down_read(e4b->alloc_semp);
908 955
909 /* 956 /*
910 * the buddy cache inode stores the block bitmap 957 * the buddy cache inode stores the block bitmap
@@ -920,6 +967,14 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
920 page = find_get_page(inode->i_mapping, pnum); 967 page = find_get_page(inode->i_mapping, pnum);
921 if (page == NULL || !PageUptodate(page)) { 968 if (page == NULL || !PageUptodate(page)) {
922 if (page) 969 if (page)
970 /*
971 * drop the page reference and try
972 * to get the page with lock. If we
973 * are not uptodate that implies
974 * somebody just created the page but
975 * is yet to initialize the same. So
976 * wait for it to initialize.
977 */
923 page_cache_release(page); 978 page_cache_release(page);
924 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); 979 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
925 if (page) { 980 if (page) {
@@ -985,6 +1040,9 @@ err:
985 page_cache_release(e4b->bd_buddy_page); 1040 page_cache_release(e4b->bd_buddy_page);
986 e4b->bd_buddy = NULL; 1041 e4b->bd_buddy = NULL;
987 e4b->bd_bitmap = NULL; 1042 e4b->bd_bitmap = NULL;
1043
1044 /* Done with the buddy cache */
1045 up_read(e4b->alloc_semp);
988 return ret; 1046 return ret;
989} 1047}
990 1048
@@ -994,6 +1052,9 @@ static void ext4_mb_release_desc(struct ext4_buddy *e4b)
994 page_cache_release(e4b->bd_bitmap_page); 1052 page_cache_release(e4b->bd_bitmap_page);
995 if (e4b->bd_buddy_page) 1053 if (e4b->bd_buddy_page)
996 page_cache_release(e4b->bd_buddy_page); 1054 page_cache_release(e4b->bd_buddy_page);
1055 /* Done with the buddy cache */
1056 if (e4b->alloc_semp)
1057 up_read(e4b->alloc_semp);
997} 1058}
998 1059
999 1060
@@ -1031,7 +1092,10 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
1031 cur += 32; 1092 cur += 32;
1032 continue; 1093 continue;
1033 } 1094 }
1034 mb_clear_bit_atomic(lock, cur, bm); 1095 if (lock)
1096 mb_clear_bit_atomic(lock, cur, bm);
1097 else
1098 mb_clear_bit(cur, bm);
1035 cur++; 1099 cur++;
1036 } 1100 }
1037} 1101}
@@ -1049,7 +1113,10 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
1049 cur += 32; 1113 cur += 32;
1050 continue; 1114 continue;
1051 } 1115 }
1052 mb_set_bit_atomic(lock, cur, bm); 1116 if (lock)
1117 mb_set_bit_atomic(lock, cur, bm);
1118 else
1119 mb_set_bit(cur, bm);
1053 cur++; 1120 cur++;
1054 } 1121 }
1055} 1122}
@@ -1094,12 +1161,11 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1094 blocknr += block; 1161 blocknr += block;
1095 blocknr += 1162 blocknr +=
1096 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 1163 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1097 ext4_unlock_group(sb, e4b->bd_group); 1164 ext4_grp_locked_error(sb, e4b->bd_group,
1098 ext4_error(sb, __func__, "double-free of inode" 1165 __func__, "double-free of inode"
1099 " %lu's block %llu(bit %u in group %lu)\n", 1166 " %lu's block %llu(bit %u in group %u)",
1100 inode ? inode->i_ino : 0, blocknr, block, 1167 inode ? inode->i_ino : 0, blocknr, block,
1101 e4b->bd_group); 1168 e4b->bd_group);
1102 ext4_lock_group(sb, e4b->bd_group);
1103 } 1169 }
1104 mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); 1170 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
1105 e4b->bd_info->bb_counters[order]++; 1171 e4b->bd_info->bb_counters[order]++;
@@ -1296,13 +1362,20 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1296 ac->ac_tail = ret & 0xffff; 1362 ac->ac_tail = ret & 0xffff;
1297 ac->ac_buddy = ret >> 16; 1363 ac->ac_buddy = ret >> 16;
1298 1364
1299 /* XXXXXXX: SUCH A HORRIBLE **CK */ 1365 /*
1300 /*FIXME!! Why ? */ 1366 * take the page reference. We want the page to be pinned
1367 * so that we don't get a ext4_mb_init_cache_call for this
1368 * group until we update the bitmap. That would mean we
1369 * double allocate blocks. The reference is dropped
1370 * in ext4_mb_release_context
1371 */
1301 ac->ac_bitmap_page = e4b->bd_bitmap_page; 1372 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1302 get_page(ac->ac_bitmap_page); 1373 get_page(ac->ac_bitmap_page);
1303 ac->ac_buddy_page = e4b->bd_buddy_page; 1374 ac->ac_buddy_page = e4b->bd_buddy_page;
1304 get_page(ac->ac_buddy_page); 1375 get_page(ac->ac_buddy_page);
1305 1376 /* on allocation we use ac to track the held semaphore */
1377 ac->alloc_semp = e4b->alloc_semp;
1378 e4b->alloc_semp = NULL;
1306 /* store last allocated for subsequent stream allocation */ 1379 /* store last allocated for subsequent stream allocation */
1307 if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { 1380 if ((ac->ac_flags & EXT4_MB_HINT_DATA)) {
1308 spin_lock(&sbi->s_md_lock); 1381 spin_lock(&sbi->s_md_lock);
@@ -1326,6 +1399,8 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1326 struct ext4_free_extent ex; 1399 struct ext4_free_extent ex;
1327 int max; 1400 int max;
1328 1401
1402 if (ac->ac_status == AC_STATUS_FOUND)
1403 return;
1329 /* 1404 /*
1330 * We don't want to scan for a whole year 1405 * We don't want to scan for a whole year
1331 */ 1406 */
@@ -1575,8 +1650,9 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1575 * free blocks even though group info says we 1650 * free blocks even though group info says we
1576 * we have free blocks 1651 * we have free blocks
1577 */ 1652 */
1578 ext4_error(sb, __func__, "%d free blocks as per " 1653 ext4_grp_locked_error(sb, e4b->bd_group,
1579 "group info. But bitmap says 0\n", 1654 __func__, "%d free blocks as per "
1655 "group info. But bitmap says 0",
1580 free); 1656 free);
1581 break; 1657 break;
1582 } 1658 }
@@ -1584,8 +1660,9 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1584 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1660 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1585 BUG_ON(ex.fe_len <= 0); 1661 BUG_ON(ex.fe_len <= 0);
1586 if (free < ex.fe_len) { 1662 if (free < ex.fe_len) {
1587 ext4_error(sb, __func__, "%d free blocks as per " 1663 ext4_grp_locked_error(sb, e4b->bd_group,
1588 "group info. But got %d blocks\n", 1664 __func__, "%d free blocks as per "
1665 "group info. But got %d blocks",
1589 free, ex.fe_len); 1666 free, ex.fe_len);
1590 /* 1667 /*
1591 * The number of free blocks differs. This mostly 1668 * The number of free blocks differs. This mostly
@@ -1692,6 +1769,173 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1692 return 0; 1769 return 0;
1693} 1770}
1694 1771
1772/*
1773 * lock the group_info alloc_sem of all the groups
1774 * belonging to the same buddy cache page. This
1775 * make sure other parallel operation on the buddy
1776 * cache doesn't happen whild holding the buddy cache
1777 * lock
1778 */
1779int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1780{
1781 int i;
1782 int block, pnum;
1783 int blocks_per_page;
1784 int groups_per_page;
1785 ext4_group_t first_group;
1786 struct ext4_group_info *grp;
1787
1788 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1789 /*
1790 * the buddy cache inode stores the block bitmap
1791 * and buddy information in consecutive blocks.
1792 * So for each group we need two blocks.
1793 */
1794 block = group * 2;
1795 pnum = block / blocks_per_page;
1796 first_group = pnum * blocks_per_page / 2;
1797
1798 groups_per_page = blocks_per_page >> 1;
1799 if (groups_per_page == 0)
1800 groups_per_page = 1;
1801 /* read all groups the page covers into the cache */
1802 for (i = 0; i < groups_per_page; i++) {
1803
1804 if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
1805 break;
1806 grp = ext4_get_group_info(sb, first_group + i);
1807 /* take all groups write allocation
1808 * semaphore. This make sure there is
1809 * no block allocation going on in any
1810 * of that groups
1811 */
1812 down_write_nested(&grp->alloc_sem, i);
1813 }
1814 return i;
1815}
1816
1817void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
1818 ext4_group_t group, int locked_group)
1819{
1820 int i;
1821 int block, pnum;
1822 int blocks_per_page;
1823 ext4_group_t first_group;
1824 struct ext4_group_info *grp;
1825
1826 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1827 /*
1828 * the buddy cache inode stores the block bitmap
1829 * and buddy information in consecutive blocks.
1830 * So for each group we need two blocks.
1831 */
1832 block = group * 2;
1833 pnum = block / blocks_per_page;
1834 first_group = pnum * blocks_per_page / 2;
1835 /* release locks on all the groups */
1836 for (i = 0; i < locked_group; i++) {
1837
1838 grp = ext4_get_group_info(sb, first_group + i);
1839 /* take all groups write allocation
1840 * semaphore. This make sure there is
1841 * no block allocation going on in any
1842 * of that groups
1843 */
1844 up_write(&grp->alloc_sem);
1845 }
1846
1847}
1848
1849static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1850{
1851
1852 int ret;
1853 void *bitmap;
1854 int blocks_per_page;
1855 int block, pnum, poff;
1856 int num_grp_locked = 0;
1857 struct ext4_group_info *this_grp;
1858 struct ext4_sb_info *sbi = EXT4_SB(sb);
1859 struct inode *inode = sbi->s_buddy_cache;
1860 struct page *page = NULL, *bitmap_page = NULL;
1861
1862 mb_debug("init group %lu\n", group);
1863 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1864 this_grp = ext4_get_group_info(sb, group);
1865 /*
1866 * This ensures we don't add group
1867 * to this buddy cache via resize
1868 */
1869 num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
1870 if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
1871 /*
1872 * somebody initialized the group
1873 * return without doing anything
1874 */
1875 ret = 0;
1876 goto err;
1877 }
1878 /*
1879 * the buddy cache inode stores the block bitmap
1880 * and buddy information in consecutive blocks.
1881 * So for each group we need two blocks.
1882 */
1883 block = group * 2;
1884 pnum = block / blocks_per_page;
1885 poff = block % blocks_per_page;
1886 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1887 if (page) {
1888 BUG_ON(page->mapping != inode->i_mapping);
1889 ret = ext4_mb_init_cache(page, NULL);
1890 if (ret) {
1891 unlock_page(page);
1892 goto err;
1893 }
1894 unlock_page(page);
1895 }
1896 if (page == NULL || !PageUptodate(page)) {
1897 ret = -EIO;
1898 goto err;
1899 }
1900 mark_page_accessed(page);
1901 bitmap_page = page;
1902 bitmap = page_address(page) + (poff * sb->s_blocksize);
1903
1904 /* init buddy cache */
1905 block++;
1906 pnum = block / blocks_per_page;
1907 poff = block % blocks_per_page;
1908 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1909 if (page == bitmap_page) {
1910 /*
1911 * If both the bitmap and buddy are in
1912 * the same page we don't need to force
1913 * init the buddy
1914 */
1915 unlock_page(page);
1916 } else if (page) {
1917 BUG_ON(page->mapping != inode->i_mapping);
1918 ret = ext4_mb_init_cache(page, bitmap);
1919 if (ret) {
1920 unlock_page(page);
1921 goto err;
1922 }
1923 unlock_page(page);
1924 }
1925 if (page == NULL || !PageUptodate(page)) {
1926 ret = -EIO;
1927 goto err;
1928 }
1929 mark_page_accessed(page);
1930err:
1931 ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
1932 if (bitmap_page)
1933 page_cache_release(bitmap_page);
1934 if (page)
1935 page_cache_release(page);
1936 return ret;
1937}
1938
1695static noinline_for_stack int 1939static noinline_for_stack int
1696ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1940ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1697{ 1941{
@@ -1775,7 +2019,7 @@ repeat:
1775 group = 0; 2019 group = 0;
1776 2020
1777 /* quick check to skip empty groups */ 2021 /* quick check to skip empty groups */
1778 grp = ext4_get_group_info(ac->ac_sb, group); 2022 grp = ext4_get_group_info(sb, group);
1779 if (grp->bb_free == 0) 2023 if (grp->bb_free == 0)
1780 continue; 2024 continue;
1781 2025
@@ -1788,10 +2032,9 @@ repeat:
1788 * we need full data about the group 2032 * we need full data about the group
1789 * to make a good selection 2033 * to make a good selection
1790 */ 2034 */
1791 err = ext4_mb_load_buddy(sb, group, &e4b); 2035 err = ext4_mb_init_group(sb, group);
1792 if (err) 2036 if (err)
1793 goto out; 2037 goto out;
1794 ext4_mb_release_desc(&e4b);
1795 } 2038 }
1796 2039
1797 /* 2040 /*
@@ -1932,13 +2175,13 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v)
1932 if (hs->op == EXT4_MB_HISTORY_ALLOC) { 2175 if (hs->op == EXT4_MB_HISTORY_ALLOC) {
1933 fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " 2176 fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u "
1934 "%-5u %-5s %-5u %-6u\n"; 2177 "%-5u %-5s %-5u %-6u\n";
1935 sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group, 2178 sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
1936 hs->result.fe_start, hs->result.fe_len, 2179 hs->result.fe_start, hs->result.fe_len,
1937 hs->result.fe_logical); 2180 hs->result.fe_logical);
1938 sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group, 2181 sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
1939 hs->orig.fe_start, hs->orig.fe_len, 2182 hs->orig.fe_start, hs->orig.fe_len,
1940 hs->orig.fe_logical); 2183 hs->orig.fe_logical);
1941 sprintf(buf3, "%lu/%d/%u@%u", hs->goal.fe_group, 2184 sprintf(buf3, "%u/%d/%u@%u", hs->goal.fe_group,
1942 hs->goal.fe_start, hs->goal.fe_len, 2185 hs->goal.fe_start, hs->goal.fe_len,
1943 hs->goal.fe_logical); 2186 hs->goal.fe_logical);
1944 seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2, 2187 seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2,
@@ -1947,20 +2190,20 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v)
1947 hs->buddy ? 1 << hs->buddy : 0); 2190 hs->buddy ? 1 << hs->buddy : 0);
1948 } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) { 2191 } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) {
1949 fmt = "%-5u %-8u %-23s %-23s %-23s\n"; 2192 fmt = "%-5u %-8u %-23s %-23s %-23s\n";
1950 sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group, 2193 sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
1951 hs->result.fe_start, hs->result.fe_len, 2194 hs->result.fe_start, hs->result.fe_len,
1952 hs->result.fe_logical); 2195 hs->result.fe_logical);
1953 sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group, 2196 sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
1954 hs->orig.fe_start, hs->orig.fe_len, 2197 hs->orig.fe_start, hs->orig.fe_len,
1955 hs->orig.fe_logical); 2198 hs->orig.fe_logical);
1956 seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2); 2199 seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2);
1957 } else if (hs->op == EXT4_MB_HISTORY_DISCARD) { 2200 } else if (hs->op == EXT4_MB_HISTORY_DISCARD) {
1958 sprintf(buf2, "%lu/%d/%u", hs->result.fe_group, 2201 sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
1959 hs->result.fe_start, hs->result.fe_len); 2202 hs->result.fe_start, hs->result.fe_len);
1960 seq_printf(seq, "%-5u %-8u %-23s discard\n", 2203 seq_printf(seq, "%-5u %-8u %-23s discard\n",
1961 hs->pid, hs->ino, buf2); 2204 hs->pid, hs->ino, buf2);
1962 } else if (hs->op == EXT4_MB_HISTORY_FREE) { 2205 } else if (hs->op == EXT4_MB_HISTORY_FREE) {
1963 sprintf(buf2, "%lu/%d/%u", hs->result.fe_group, 2206 sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
1964 hs->result.fe_start, hs->result.fe_len); 2207 hs->result.fe_start, hs->result.fe_len);
1965 seq_printf(seq, "%-5u %-8u %-23s free\n", 2208 seq_printf(seq, "%-5u %-8u %-23s free\n",
1966 hs->pid, hs->ino, buf2); 2209 hs->pid, hs->ino, buf2);
@@ -2073,7 +2316,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2073 return NULL; 2316 return NULL;
2074 2317
2075 group = *pos + 1; 2318 group = *pos + 1;
2076 return (void *) group; 2319 return (void *) ((unsigned long) group);
2077} 2320}
2078 2321
2079static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) 2322static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -2086,13 +2329,13 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2086 if (*pos < 0 || *pos >= sbi->s_groups_count) 2329 if (*pos < 0 || *pos >= sbi->s_groups_count)
2087 return NULL; 2330 return NULL;
2088 group = *pos + 1; 2331 group = *pos + 1;
2089 return (void *) group;; 2332 return (void *) ((unsigned long) group);
2090} 2333}
2091 2334
2092static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) 2335static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2093{ 2336{
2094 struct super_block *sb = seq->private; 2337 struct super_block *sb = seq->private;
2095 long group = (long) v; 2338 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2096 int i; 2339 int i;
2097 int err; 2340 int err;
2098 struct ext4_buddy e4b; 2341 struct ext4_buddy e4b;
@@ -2114,7 +2357,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2114 sizeof(struct ext4_group_info); 2357 sizeof(struct ext4_group_info);
2115 err = ext4_mb_load_buddy(sb, group, &e4b); 2358 err = ext4_mb_load_buddy(sb, group, &e4b);
2116 if (err) { 2359 if (err) {
2117 seq_printf(seq, "#%-5lu: I/O error\n", group); 2360 seq_printf(seq, "#%-5u: I/O error\n", group);
2118 return 0; 2361 return 0;
2119 } 2362 }
2120 ext4_lock_group(sb, group); 2363 ext4_lock_group(sb, group);
@@ -2122,7 +2365,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2122 ext4_unlock_group(sb, group); 2365 ext4_unlock_group(sb, group);
2123 ext4_mb_release_desc(&e4b); 2366 ext4_mb_release_desc(&e4b);
2124 2367
2125 seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free, 2368 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2126 sg.info.bb_fragments, sg.info.bb_first_free); 2369 sg.info.bb_fragments, sg.info.bb_first_free);
2127 for (i = 0; i <= 13; i++) 2370 for (i = 0; i <= 13; i++)
2128 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? 2371 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
@@ -2296,10 +2539,11 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2296 ext4_free_blocks_after_init(sb, group, desc); 2539 ext4_free_blocks_after_init(sb, group, desc);
2297 } else { 2540 } else {
2298 meta_group_info[i]->bb_free = 2541 meta_group_info[i]->bb_free =
2299 le16_to_cpu(desc->bg_free_blocks_count); 2542 ext4_free_blks_count(sb, desc);
2300 } 2543 }
2301 2544
2302 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2545 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2546 init_rwsem(&meta_group_info[i]->alloc_sem);
2303 meta_group_info[i]->bb_free_root.rb_node = NULL;; 2547 meta_group_info[i]->bb_free_root.rb_node = NULL;;
2304 2548
2305#ifdef DOUBLE_CHECK 2549#ifdef DOUBLE_CHECK
@@ -2327,54 +2571,6 @@ exit_meta_group_info:
2327} /* ext4_mb_add_groupinfo */ 2571} /* ext4_mb_add_groupinfo */
2328 2572
2329/* 2573/*
2330 * Add a group to the existing groups.
2331 * This function is used for online resize
2332 */
2333int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
2334 struct ext4_group_desc *desc)
2335{
2336 struct ext4_sb_info *sbi = EXT4_SB(sb);
2337 struct inode *inode = sbi->s_buddy_cache;
2338 int blocks_per_page;
2339 int block;
2340 int pnum;
2341 struct page *page;
2342 int err;
2343
2344 /* Add group based on group descriptor*/
2345 err = ext4_mb_add_groupinfo(sb, group, desc);
2346 if (err)
2347 return err;
2348
2349 /*
2350 * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
2351 * datas) are set not up to date so that they will be re-initilaized
2352 * during the next call to ext4_mb_load_buddy
2353 */
2354
2355 /* Set buddy page as not up to date */
2356 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
2357 block = group * 2;
2358 pnum = block / blocks_per_page;
2359 page = find_get_page(inode->i_mapping, pnum);
2360 if (page != NULL) {
2361 ClearPageUptodate(page);
2362 page_cache_release(page);
2363 }
2364
2365 /* Set bitmap page as not up to date */
2366 block++;
2367 pnum = block / blocks_per_page;
2368 page = find_get_page(inode->i_mapping, pnum);
2369 if (page != NULL) {
2370 ClearPageUptodate(page);
2371 page_cache_release(page);
2372 }
2373
2374 return 0;
2375}
2376
2377/*
2378 * Update an existing group. 2574 * Update an existing group.
2379 * This function is used for online resize 2575 * This function is used for online resize
2380 */ 2576 */
@@ -2457,7 +2653,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2457 desc = ext4_get_group_desc(sb, i, NULL); 2653 desc = ext4_get_group_desc(sb, i, NULL);
2458 if (desc == NULL) { 2654 if (desc == NULL) {
2459 printk(KERN_ERR 2655 printk(KERN_ERR
2460 "EXT4-fs: can't read descriptor %lu\n", i); 2656 "EXT4-fs: can't read descriptor %u\n", i);
2461 goto err_freebuddy; 2657 goto err_freebuddy;
2462 } 2658 }
2463 if (ext4_mb_add_groupinfo(sb, i, desc) != 0) 2659 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
@@ -2493,6 +2689,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2493 if (sbi->s_mb_offsets == NULL) { 2689 if (sbi->s_mb_offsets == NULL) {
2494 return -ENOMEM; 2690 return -ENOMEM;
2495 } 2691 }
2692
2693 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int);
2496 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); 2694 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2497 if (sbi->s_mb_maxs == NULL) { 2695 if (sbi->s_mb_maxs == NULL) {
2498 kfree(sbi->s_mb_maxs); 2696 kfree(sbi->s_mb_maxs);
@@ -2551,7 +2749,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2551 ext4_mb_init_per_dev_proc(sb); 2749 ext4_mb_init_per_dev_proc(sb);
2552 ext4_mb_history_init(sb); 2750 ext4_mb_history_init(sb);
2553 2751
2554 sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2752 if (sbi->s_journal)
2753 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2555 2754
2556 printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); 2755 printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
2557 return 0; 2756 return 0;
@@ -2652,7 +2851,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2652 list_for_each_safe(l, ltmp, &txn->t_private_list) { 2851 list_for_each_safe(l, ltmp, &txn->t_private_list) {
2653 entry = list_entry(l, struct ext4_free_data, list); 2852 entry = list_entry(l, struct ext4_free_data, list);
2654 2853
2655 mb_debug("gonna free %u blocks in group %lu (0x%p):", 2854 mb_debug("gonna free %u blocks in group %u (0x%p):",
2656 entry->count, entry->group, entry); 2855 entry->count, entry->group, entry);
2657 2856
2658 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2857 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
@@ -2679,8 +2878,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2679 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) 2878 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
2680 + entry->start_blk 2879 + entry->start_blk
2681 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 2880 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
2682 trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id, 2881 trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u",
2683 (unsigned long long) discard_block, entry->count); 2882 sb->s_id, (unsigned long long) discard_block,
2883 entry->count);
2684 sb_issue_discard(sb, discard_block, entry->count); 2884 sb_issue_discard(sb, discard_block, entry->count);
2685 2885
2686 kmem_cache_free(ext4_free_ext_cachep, entry); 2886 kmem_cache_free(ext4_free_ext_cachep, entry);
@@ -2791,7 +2991,7 @@ void exit_ext4_mballoc(void)
2791 */ 2991 */
2792static noinline_for_stack int 2992static noinline_for_stack int
2793ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2993ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2794 handle_t *handle, unsigned long reserv_blks) 2994 handle_t *handle, unsigned int reserv_blks)
2795{ 2995{
2796 struct buffer_head *bitmap_bh = NULL; 2996 struct buffer_head *bitmap_bh = NULL;
2797 struct ext4_super_block *es; 2997 struct ext4_super_block *es;
@@ -2824,7 +3024,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2824 if (!gdp) 3024 if (!gdp)
2825 goto out_err; 3025 goto out_err;
2826 3026
2827 ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, 3027 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2828 gdp->bg_free_blocks_count); 3028 gdp->bg_free_blocks_count);
2829 3029
2830 err = ext4_journal_get_write_access(handle, gdp_bh); 3030 err = ext4_journal_get_write_access(handle, gdp_bh);
@@ -2843,8 +3043,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2843 in_range(block + len - 1, ext4_inode_table(sb, gdp), 3043 in_range(block + len - 1, ext4_inode_table(sb, gdp),
2844 EXT4_SB(sb)->s_itb_per_group)) { 3044 EXT4_SB(sb)->s_itb_per_group)) {
2845 ext4_error(sb, __func__, 3045 ext4_error(sb, __func__,
2846 "Allocating block in system zone - block = %llu", 3046 "Allocating block %llu in system zone of %d group\n",
2847 block); 3047 block, ac->ac_b_ex.fe_group);
2848 /* File system mounted not to panic on error 3048 /* File system mounted not to panic on error
2849 * Fix the bitmap and repeat the block allocation 3049 * Fix the bitmap and repeat the block allocation
2850 * We leak some of the blocks here. 3050 * We leak some of the blocks here.
@@ -2852,7 +3052,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2852 mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), 3052 mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
2853 bitmap_bh->b_data, ac->ac_b_ex.fe_start, 3053 bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2854 ac->ac_b_ex.fe_len); 3054 ac->ac_b_ex.fe_len);
2855 err = ext4_journal_dirty_metadata(handle, bitmap_bh); 3055 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2856 if (!err) 3056 if (!err)
2857 err = -EAGAIN; 3057 err = -EAGAIN;
2858 goto out_err; 3058 goto out_err;
@@ -2866,18 +3066,17 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2866 } 3066 }
2867 } 3067 }
2868#endif 3068#endif
2869 mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data,
2870 ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
2871
2872 spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 3069 spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
3070 mb_set_bits(NULL, bitmap_bh->b_data,
3071 ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
2873 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 3072 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2874 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 3073 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2875 gdp->bg_free_blocks_count = 3074 ext4_free_blks_set(sb, gdp,
2876 cpu_to_le16(ext4_free_blocks_after_init(sb, 3075 ext4_free_blocks_after_init(sb,
2877 ac->ac_b_ex.fe_group, 3076 ac->ac_b_ex.fe_group, gdp));
2878 gdp));
2879 } 3077 }
2880 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); 3078 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
3079 ext4_free_blks_set(sb, gdp, len);
2881 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 3080 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2882 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 3081 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
2883 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 3082 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -2899,10 +3098,10 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2899 spin_unlock(sb_bgl_lock(sbi, flex_group)); 3098 spin_unlock(sb_bgl_lock(sbi, flex_group));
2900 } 3099 }
2901 3100
2902 err = ext4_journal_dirty_metadata(handle, bitmap_bh); 3101 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2903 if (err) 3102 if (err)
2904 goto out_err; 3103 goto out_err;
2905 err = ext4_journal_dirty_metadata(handle, gdp_bh); 3104 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
2906 3105
2907out_err: 3106out_err:
2908 sb->s_dirt = 1; 3107 sb->s_dirt = 1;
@@ -3031,7 +3230,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3031 /* check we don't cross already preallocated blocks */ 3230 /* check we don't cross already preallocated blocks */
3032 rcu_read_lock(); 3231 rcu_read_lock();
3033 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { 3232 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3034 unsigned long pa_end; 3233 ext4_lblk_t pa_end;
3035 3234
3036 if (pa->pa_deleted) 3235 if (pa->pa_deleted)
3037 continue; 3236 continue;
@@ -3075,7 +3274,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3075 /* XXX: extra loop to check we really don't overlap preallocations */ 3274 /* XXX: extra loop to check we really don't overlap preallocations */
3076 rcu_read_lock(); 3275 rcu_read_lock();
3077 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { 3276 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3078 unsigned long pa_end; 3277 ext4_lblk_t pa_end;
3079 spin_lock(&pa->pa_lock); 3278 spin_lock(&pa->pa_lock);
3080 if (pa->pa_deleted == 0) { 3279 if (pa->pa_deleted == 0) {
3081 pa_end = pa->pa_lstart + pa->pa_len; 3280 pa_end = pa->pa_lstart + pa->pa_len;
@@ -3307,6 +3506,32 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3307} 3506}
3308 3507
3309/* 3508/*
3509 * the function goes through all block freed in the group
3510 * but not yet committed and marks them used in in-core bitmap.
3511 * buddy must be generated from this bitmap
3512 * Need to be called with ext4 group lock (ext4_lock_group)
3513 */
3514static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3515 ext4_group_t group)
3516{
3517 struct rb_node *n;
3518 struct ext4_group_info *grp;
3519 struct ext4_free_data *entry;
3520
3521 grp = ext4_get_group_info(sb, group);
3522 n = rb_first(&(grp->bb_free_root));
3523
3524 while (n) {
3525 entry = rb_entry(n, struct ext4_free_data, node);
3526 mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
3527 bitmap, entry->start_blk,
3528 entry->count);
3529 n = rb_next(n);
3530 }
3531 return;
3532}
3533
3534/*
3310 * the function goes through all preallocation in this group and marks them 3535 * the function goes through all preallocation in this group and marks them
3311 * used in in-core bitmap. buddy must be generated from this bitmap 3536 * used in in-core bitmap. buddy must be generated from this bitmap
3312 * Need to be called with ext4 group lock (ext4_lock_group) 3537 * Need to be called with ext4 group lock (ext4_lock_group)
@@ -3346,7 +3571,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3346 preallocated += len; 3571 preallocated += len;
3347 count++; 3572 count++;
3348 } 3573 }
3349 mb_debug("prellocated %u for group %lu\n", preallocated, group); 3574 mb_debug("prellocated %u for group %u\n", preallocated, group);
3350} 3575}
3351 3576
3352static void ext4_mb_pa_callback(struct rcu_head *head) 3577static void ext4_mb_pa_callback(struct rcu_head *head)
@@ -3363,7 +3588,7 @@ static void ext4_mb_pa_callback(struct rcu_head *head)
3363static void ext4_mb_put_pa(struct ext4_allocation_context *ac, 3588static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3364 struct super_block *sb, struct ext4_prealloc_space *pa) 3589 struct super_block *sb, struct ext4_prealloc_space *pa)
3365{ 3590{
3366 unsigned long grp; 3591 ext4_group_t grp;
3367 3592
3368 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) 3593 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
3369 return; 3594 return;
@@ -3473,6 +3698,10 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3473 3698
3474 mb_debug("new inode pa %p: %llu/%u for %u\n", pa, 3699 mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
3475 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3700 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3701 trace_mark(ext4_mb_new_inode_pa,
3702 "dev %s ino %lu pstart %llu len %u lstart %u",
3703 sb->s_id, ac->ac_inode->i_ino,
3704 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3476 3705
3477 ext4_mb_use_inode_pa(ac, pa); 3706 ext4_mb_use_inode_pa(ac, pa);
3478 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); 3707 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3530,7 +3759,9 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3530 pa->pa_linear = 1; 3759 pa->pa_linear = 1;
3531 3760
3532 mb_debug("new group pa %p: %llu/%u for %u\n", pa, 3761 mb_debug("new group pa %p: %llu/%u for %u\n", pa,
3533 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3762 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3763 trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u",
3764 sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3534 3765
3535 ext4_mb_use_group_pa(ac, pa); 3766 ext4_mb_use_group_pa(ac, pa);
3536 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); 3767 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3579,16 +3810,18 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3579{ 3810{
3580 struct super_block *sb = e4b->bd_sb; 3811 struct super_block *sb = e4b->bd_sb;
3581 struct ext4_sb_info *sbi = EXT4_SB(sb); 3812 struct ext4_sb_info *sbi = EXT4_SB(sb);
3582 unsigned long end; 3813 unsigned int end;
3583 unsigned long next; 3814 unsigned int next;
3584 ext4_group_t group; 3815 ext4_group_t group;
3585 ext4_grpblk_t bit; 3816 ext4_grpblk_t bit;
3817 unsigned long long grp_blk_start;
3586 sector_t start; 3818 sector_t start;
3587 int err = 0; 3819 int err = 0;
3588 int free = 0; 3820 int free = 0;
3589 3821
3590 BUG_ON(pa->pa_deleted == 0); 3822 BUG_ON(pa->pa_deleted == 0);
3591 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3823 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3824 grp_blk_start = pa->pa_pstart - bit;
3592 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3825 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3593 end = bit + pa->pa_len; 3826 end = bit + pa->pa_len;
3594 3827
@@ -3618,6 +3851,10 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3618 ext4_mb_store_history(ac); 3851 ext4_mb_store_history(ac);
3619 } 3852 }
3620 3853
3854 trace_mark(ext4_mb_release_inode_pa,
3855 "dev %s ino %lu block %llu count %u",
3856 sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit,
3857 next - bit);
3621 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3858 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3622 bit = next + 1; 3859 bit = next + 1;
3623 } 3860 }
@@ -3626,8 +3863,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3626 pa, (unsigned long) pa->pa_lstart, 3863 pa, (unsigned long) pa->pa_lstart,
3627 (unsigned long) pa->pa_pstart, 3864 (unsigned long) pa->pa_pstart,
3628 (unsigned long) pa->pa_len); 3865 (unsigned long) pa->pa_len);
3629 ext4_error(sb, __func__, "free %u, pa_free %u\n", 3866 ext4_grp_locked_error(sb, group,
3630 free, pa->pa_free); 3867 __func__, "free %u, pa_free %u",
3868 free, pa->pa_free);
3631 /* 3869 /*
3632 * pa is already deleted so we use the value obtained 3870 * pa is already deleted so we use the value obtained
3633 * from the bitmap and continue. 3871 * from the bitmap and continue.
@@ -3650,6 +3888,8 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3650 if (ac) 3888 if (ac)
3651 ac->ac_op = EXT4_MB_HISTORY_DISCARD; 3889 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3652 3890
3891 trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d",
3892 sb->s_id, pa->pa_pstart, pa->pa_len);
3653 BUG_ON(pa->pa_deleted == 0); 3893 BUG_ON(pa->pa_deleted == 0);
3654 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3894 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3655 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3895 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -3692,7 +3932,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3692 int busy = 0; 3932 int busy = 0;
3693 int free = 0; 3933 int free = 0;
3694 3934
3695 mb_debug("discard preallocation for group %lu\n", group); 3935 mb_debug("discard preallocation for group %u\n", group);
3696 3936
3697 if (list_empty(&grp->bb_prealloc_list)) 3937 if (list_empty(&grp->bb_prealloc_list))
3698 return 0; 3938 return 0;
@@ -3700,14 +3940,14 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3700 bitmap_bh = ext4_read_block_bitmap(sb, group); 3940 bitmap_bh = ext4_read_block_bitmap(sb, group);
3701 if (bitmap_bh == NULL) { 3941 if (bitmap_bh == NULL) {
3702 ext4_error(sb, __func__, "Error in reading block " 3942 ext4_error(sb, __func__, "Error in reading block "
3703 "bitmap for %lu\n", group); 3943 "bitmap for %u", group);
3704 return 0; 3944 return 0;
3705 } 3945 }
3706 3946
3707 err = ext4_mb_load_buddy(sb, group, &e4b); 3947 err = ext4_mb_load_buddy(sb, group, &e4b);
3708 if (err) { 3948 if (err) {
3709 ext4_error(sb, __func__, "Error in loading buddy " 3949 ext4_error(sb, __func__, "Error in loading buddy "
3710 "information for %lu\n", group); 3950 "information for %u", group);
3711 put_bh(bitmap_bh); 3951 put_bh(bitmap_bh);
3712 return 0; 3952 return 0;
3713 } 3953 }
@@ -3815,6 +4055,8 @@ void ext4_discard_preallocations(struct inode *inode)
3815 } 4055 }
3816 4056
3817 mb_debug("discard preallocation for inode %lu\n", inode->i_ino); 4057 mb_debug("discard preallocation for inode %lu\n", inode->i_ino);
4058 trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id,
4059 inode->i_ino);
3818 4060
3819 INIT_LIST_HEAD(&list); 4061 INIT_LIST_HEAD(&list);
3820 4062
@@ -3874,14 +4116,14 @@ repeat:
3874 err = ext4_mb_load_buddy(sb, group, &e4b); 4116 err = ext4_mb_load_buddy(sb, group, &e4b);
3875 if (err) { 4117 if (err) {
3876 ext4_error(sb, __func__, "Error in loading buddy " 4118 ext4_error(sb, __func__, "Error in loading buddy "
3877 "information for %lu\n", group); 4119 "information for %u", group);
3878 continue; 4120 continue;
3879 } 4121 }
3880 4122
3881 bitmap_bh = ext4_read_block_bitmap(sb, group); 4123 bitmap_bh = ext4_read_block_bitmap(sb, group);
3882 if (bitmap_bh == NULL) { 4124 if (bitmap_bh == NULL) {
3883 ext4_error(sb, __func__, "Error in reading block " 4125 ext4_error(sb, __func__, "Error in reading block "
3884 "bitmap for %lu\n", group); 4126 "bitmap for %u", group);
3885 ext4_mb_release_desc(&e4b); 4127 ext4_mb_release_desc(&e4b);
3886 continue; 4128 continue;
3887 } 4129 }
@@ -4024,8 +4266,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4024 struct ext4_sb_info *sbi = EXT4_SB(sb); 4266 struct ext4_sb_info *sbi = EXT4_SB(sb);
4025 struct ext4_super_block *es = sbi->s_es; 4267 struct ext4_super_block *es = sbi->s_es;
4026 ext4_group_t group; 4268 ext4_group_t group;
4027 unsigned long len; 4269 unsigned int len;
4028 unsigned long goal; 4270 ext4_fsblk_t goal;
4029 ext4_grpblk_t block; 4271 ext4_grpblk_t block;
4030 4272
4031 /* we can't allocate > group size */ 4273 /* we can't allocate > group size */
@@ -4068,6 +4310,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4068 ac->ac_pa = NULL; 4310 ac->ac_pa = NULL;
4069 ac->ac_bitmap_page = NULL; 4311 ac->ac_bitmap_page = NULL;
4070 ac->ac_buddy_page = NULL; 4312 ac->ac_buddy_page = NULL;
4313 ac->alloc_semp = NULL;
4071 ac->ac_lg = NULL; 4314 ac->ac_lg = NULL;
4072 4315
4073 /* we have to define context: we'll we work with a file or 4316 /* we have to define context: we'll we work with a file or
@@ -4146,7 +4389,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4146 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); 4389 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4147 if (ext4_mb_load_buddy(sb, group, &e4b)) { 4390 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4148 ext4_error(sb, __func__, "Error in loading buddy " 4391 ext4_error(sb, __func__, "Error in loading buddy "
4149 "information for %lu\n", group); 4392 "information for %u", group);
4150 continue; 4393 continue;
4151 } 4394 }
4152 ext4_lock_group(sb, group); 4395 ext4_lock_group(sb, group);
@@ -4248,6 +4491,8 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4248 } 4491 }
4249 ext4_mb_put_pa(ac, ac->ac_sb, pa); 4492 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4250 } 4493 }
4494 if (ac->alloc_semp)
4495 up_read(ac->alloc_semp);
4251 if (ac->ac_bitmap_page) 4496 if (ac->ac_bitmap_page)
4252 page_cache_release(ac->ac_bitmap_page); 4497 page_cache_release(ac->ac_bitmap_page);
4253 if (ac->ac_buddy_page) 4498 if (ac->ac_buddy_page)
@@ -4264,6 +4509,8 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4264 int ret; 4509 int ret;
4265 int freed = 0; 4510 int freed = 0;
4266 4511
4512 trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
4513 sb->s_id, needed);
4267 for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { 4514 for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) {
4268 ret = ext4_mb_discard_group_preallocations(sb, i, needed); 4515 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4269 freed += ret; 4516 freed += ret;
@@ -4286,12 +4533,24 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4286 struct ext4_sb_info *sbi; 4533 struct ext4_sb_info *sbi;
4287 struct super_block *sb; 4534 struct super_block *sb;
4288 ext4_fsblk_t block = 0; 4535 ext4_fsblk_t block = 0;
4289 unsigned long inquota; 4536 unsigned int inquota;
4290 unsigned long reserv_blks = 0; 4537 unsigned int reserv_blks = 0;
4291 4538
4292 sb = ar->inode->i_sb; 4539 sb = ar->inode->i_sb;
4293 sbi = EXT4_SB(sb); 4540 sbi = EXT4_SB(sb);
4294 4541
4542 trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu "
4543 "lblk %llu goal %llu lleft %llu lright %llu "
4544 "pleft %llu pright %llu ",
4545 sb->s_id, ar->flags, ar->len,
4546 ar->inode ? ar->inode->i_ino : 0,
4547 (unsigned long long) ar->logical,
4548 (unsigned long long) ar->goal,
4549 (unsigned long long) ar->lleft,
4550 (unsigned long long) ar->lright,
4551 (unsigned long long) ar->pleft,
4552 (unsigned long long) ar->pright);
4553
4295 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { 4554 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
4296 /* 4555 /*
4297 * With delalloc we already reserved the blocks 4556 * With delalloc we already reserved the blocks
@@ -4313,7 +4572,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4313 } 4572 }
4314 if (ar->len == 0) { 4573 if (ar->len == 0) {
4315 *errp = -EDQUOT; 4574 *errp = -EDQUOT;
4316 return 0; 4575 goto out3;
4317 } 4576 }
4318 inquota = ar->len; 4577 inquota = ar->len;
4319 4578
@@ -4348,10 +4607,14 @@ repeat:
4348 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) 4607 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4349 ext4_mb_new_preallocation(ac); 4608 ext4_mb_new_preallocation(ac);
4350 } 4609 }
4351
4352 if (likely(ac->ac_status == AC_STATUS_FOUND)) { 4610 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4353 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); 4611 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
4354 if (*errp == -EAGAIN) { 4612 if (*errp == -EAGAIN) {
4613 /*
4614 * drop the reference that we took
4615 * in ext4_mb_use_best_found
4616 */
4617 ext4_mb_release_context(ac);
4355 ac->ac_b_ex.fe_group = 0; 4618 ac->ac_b_ex.fe_group = 0;
4356 ac->ac_b_ex.fe_start = 0; 4619 ac->ac_b_ex.fe_start = 0;
4357 ac->ac_b_ex.fe_len = 0; 4620 ac->ac_b_ex.fe_len = 0;
@@ -4382,6 +4645,26 @@ out2:
4382out1: 4645out1:
4383 if (ar->len < inquota) 4646 if (ar->len < inquota)
4384 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); 4647 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
4648out3:
4649 if (!ar->len) {
4650 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
4651 /* release all the reserved blocks if non delalloc */
4652 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
4653 reserv_blks);
4654 }
4655
4656 trace_mark(ext4_allocate_blocks,
4657 "dev %s block %llu flags %u len %u ino %lu "
4658 "logical %llu goal %llu lleft %llu lright %llu "
4659 "pleft %llu pright %llu ",
4660 sb->s_id, (unsigned long long) block,
4661 ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0,
4662 (unsigned long long) ar->logical,
4663 (unsigned long long) ar->goal,
4664 (unsigned long long) ar->lleft,
4665 (unsigned long long) ar->lright,
4666 (unsigned long long) ar->pleft,
4667 (unsigned long long) ar->pright);
4385 4668
4386 return block; 4669 return block;
4387} 4670}
@@ -4403,27 +4686,23 @@ static int can_merge(struct ext4_free_data *entry1,
4403 4686
4404static noinline_for_stack int 4687static noinline_for_stack int
4405ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, 4688ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4406 ext4_group_t group, ext4_grpblk_t block, int count) 4689 struct ext4_free_data *new_entry)
4407{ 4690{
4691 ext4_grpblk_t block;
4692 struct ext4_free_data *entry;
4408 struct ext4_group_info *db = e4b->bd_info; 4693 struct ext4_group_info *db = e4b->bd_info;
4409 struct super_block *sb = e4b->bd_sb; 4694 struct super_block *sb = e4b->bd_sb;
4410 struct ext4_sb_info *sbi = EXT4_SB(sb); 4695 struct ext4_sb_info *sbi = EXT4_SB(sb);
4411 struct ext4_free_data *entry, *new_entry;
4412 struct rb_node **n = &db->bb_free_root.rb_node, *node; 4696 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4413 struct rb_node *parent = NULL, *new_node; 4697 struct rb_node *parent = NULL, *new_node;
4414 4698
4415 4699 BUG_ON(!ext4_handle_valid(handle));
4416 BUG_ON(e4b->bd_bitmap_page == NULL); 4700 BUG_ON(e4b->bd_bitmap_page == NULL);
4417 BUG_ON(e4b->bd_buddy_page == NULL); 4701 BUG_ON(e4b->bd_buddy_page == NULL);
4418 4702
4419 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4420 new_entry->start_blk = block;
4421 new_entry->group = group;
4422 new_entry->count = count;
4423 new_entry->t_tid = handle->h_transaction->t_tid;
4424 new_node = &new_entry->node; 4703 new_node = &new_entry->node;
4704 block = new_entry->start_blk;
4425 4705
4426 ext4_lock_group(sb, group);
4427 if (!*n) { 4706 if (!*n) {
4428 /* first free block exent. We need to 4707 /* first free block exent. We need to
4429 protect buddy cache from being freed, 4708 protect buddy cache from being freed,
@@ -4441,10 +4720,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4441 else if (block >= (entry->start_blk + entry->count)) 4720 else if (block >= (entry->start_blk + entry->count))
4442 n = &(*n)->rb_right; 4721 n = &(*n)->rb_right;
4443 else { 4722 else {
4444 ext4_unlock_group(sb, group); 4723 ext4_grp_locked_error(sb, e4b->bd_group, __func__,
4445 ext4_error(sb, __func__, 4724 "Double free of blocks %d (%d %d)",
4446 "Double free of blocks %d (%d %d)\n", 4725 block, entry->start_blk, entry->count);
4447 block, entry->start_blk, entry->count);
4448 return 0; 4726 return 0;
4449 } 4727 }
4450 } 4728 }
@@ -4483,7 +4761,6 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4483 spin_lock(&sbi->s_md_lock); 4761 spin_lock(&sbi->s_md_lock);
4484 list_add(&new_entry->list, &handle->h_transaction->t_private_list); 4762 list_add(&new_entry->list, &handle->h_transaction->t_private_list);
4485 spin_unlock(&sbi->s_md_lock); 4763 spin_unlock(&sbi->s_md_lock);
4486 ext4_unlock_group(sb, group);
4487 return 0; 4764 return 0;
4488} 4765}
4489 4766
@@ -4499,7 +4776,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4499 struct ext4_allocation_context *ac = NULL; 4776 struct ext4_allocation_context *ac = NULL;
4500 struct ext4_group_desc *gdp; 4777 struct ext4_group_desc *gdp;
4501 struct ext4_super_block *es; 4778 struct ext4_super_block *es;
4502 unsigned long overflow; 4779 unsigned int overflow;
4503 ext4_grpblk_t bit; 4780 ext4_grpblk_t bit;
4504 struct buffer_head *gd_bh; 4781 struct buffer_head *gd_bh;
4505 ext4_group_t block_group; 4782 ext4_group_t block_group;
@@ -4522,6 +4799,10 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4522 } 4799 }
4523 4800
4524 ext4_debug("freeing block %lu\n", block); 4801 ext4_debug("freeing block %lu\n", block);
4802 trace_mark(ext4_free_blocks,
4803 "dev %s block %llu count %lu metadata %d ino %lu",
4804 sb->s_id, (unsigned long long) block, count, metadata,
4805 inode ? inode->i_ino : 0);
4525 4806
4526 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4807 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4527 if (ac) { 4808 if (ac) {
@@ -4581,11 +4862,6 @@ do_more:
4581 err = ext4_journal_get_write_access(handle, gd_bh); 4862 err = ext4_journal_get_write_access(handle, gd_bh);
4582 if (err) 4863 if (err)
4583 goto error_return; 4864 goto error_return;
4584
4585 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4586 if (err)
4587 goto error_return;
4588
4589#ifdef AGGRESSIVE_CHECK 4865#ifdef AGGRESSIVE_CHECK
4590 { 4866 {
4591 int i; 4867 int i;
@@ -4593,13 +4869,6 @@ do_more:
4593 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); 4869 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4594 } 4870 }
4595#endif 4871#endif
4596 mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
4597 bit, count);
4598
4599 /* We dirtied the bitmap block */
4600 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4601 err = ext4_journal_dirty_metadata(handle, bitmap_bh);
4602
4603 if (ac) { 4872 if (ac) {
4604 ac->ac_b_ex.fe_group = block_group; 4873 ac->ac_b_ex.fe_group = block_group;
4605 ac->ac_b_ex.fe_start = bit; 4874 ac->ac_b_ex.fe_start = bit;
@@ -4607,19 +4876,41 @@ do_more:
4607 ext4_mb_store_history(ac); 4876 ext4_mb_store_history(ac);
4608 } 4877 }
4609 4878
4610 if (metadata) { 4879 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4611 /* blocks being freed are metadata. these blocks shouldn't 4880 if (err)
4612 * be used until this transaction is committed */ 4881 goto error_return;
4613 ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); 4882 if (metadata && ext4_handle_valid(handle)) {
4883 struct ext4_free_data *new_entry;
4884 /*
4885 * blocks being freed are metadata. these blocks shouldn't
4886 * be used until this transaction is committed
4887 */
4888 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4889 new_entry->start_blk = bit;
4890 new_entry->group = block_group;
4891 new_entry->count = count;
4892 new_entry->t_tid = handle->h_transaction->t_tid;
4893 ext4_lock_group(sb, block_group);
4894 mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
4895 bit, count);
4896 ext4_mb_free_metadata(handle, &e4b, new_entry);
4897 ext4_unlock_group(sb, block_group);
4614 } else { 4898 } else {
4615 ext4_lock_group(sb, block_group); 4899 ext4_lock_group(sb, block_group);
4900 /* need to update group_info->bb_free and bitmap
4901 * with group lock held. generate_buddy look at
4902 * them with group lock_held
4903 */
4904 mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
4905 bit, count);
4616 mb_free_blocks(inode, &e4b, bit, count); 4906 mb_free_blocks(inode, &e4b, bit, count);
4617 ext4_mb_return_to_preallocation(inode, &e4b, block, count); 4907 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4618 ext4_unlock_group(sb, block_group); 4908 ext4_unlock_group(sb, block_group);
4619 } 4909 }
4620 4910
4621 spin_lock(sb_bgl_lock(sbi, block_group)); 4911 spin_lock(sb_bgl_lock(sbi, block_group));
4622 le16_add_cpu(&gdp->bg_free_blocks_count, count); 4912 ret = ext4_free_blks_count(sb, gdp) + count;
4913 ext4_free_blks_set(sb, gdp, ret);
4623 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4914 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4624 spin_unlock(sb_bgl_lock(sbi, block_group)); 4915 spin_unlock(sb_bgl_lock(sbi, block_group));
4625 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4916 percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -4635,9 +4926,13 @@ do_more:
4635 4926
4636 *freed += count; 4927 *freed += count;
4637 4928
4929 /* We dirtied the bitmap block */
4930 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4931 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4932
4638 /* And the group descriptor block */ 4933 /* And the group descriptor block */
4639 BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); 4934 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4640 ret = ext4_journal_dirty_metadata(handle, gd_bh); 4935 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4641 if (!err) 4936 if (!err)
4642 err = ret; 4937 err = ret;
4643 4938
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index b5dff1fff1e5..10a2921baf14 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -20,6 +20,7 @@
20#include <linux/version.h> 20#include <linux/version.h>
21#include <linux/blkdev.h> 21#include <linux/blkdev.h>
22#include <linux/marker.h> 22#include <linux/marker.h>
23#include <linux/mutex.h>
23#include "ext4_jbd2.h" 24#include "ext4_jbd2.h"
24#include "ext4.h" 25#include "ext4.h"
25#include "group.h" 26#include "group.h"
@@ -98,9 +99,6 @@
98 */ 99 */
99#define MB_DEFAULT_GROUP_PREALLOC 512 100#define MB_DEFAULT_GROUP_PREALLOC 512
100 101
101static struct kmem_cache *ext4_pspace_cachep;
102static struct kmem_cache *ext4_ac_cachep;
103static struct kmem_cache *ext4_free_ext_cachep;
104 102
105struct ext4_free_data { 103struct ext4_free_data {
106 /* this links the free block information from group_info */ 104 /* this links the free block information from group_info */
@@ -120,26 +118,6 @@ struct ext4_free_data {
120 tid_t t_tid; 118 tid_t t_tid;
121}; 119};
122 120
123struct ext4_group_info {
124 unsigned long bb_state;
125 struct rb_root bb_free_root;
126 unsigned short bb_first_free;
127 unsigned short bb_free;
128 unsigned short bb_fragments;
129 struct list_head bb_prealloc_list;
130#ifdef DOUBLE_CHECK
131 void *bb_bitmap;
132#endif
133 unsigned short bb_counters[];
134};
135
136#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
137#define EXT4_GROUP_INFO_LOCKED_BIT 1
138
139#define EXT4_MB_GRP_NEED_INIT(grp) \
140 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
141
142
143struct ext4_prealloc_space { 121struct ext4_prealloc_space {
144 struct list_head pa_inode_list; 122 struct list_head pa_inode_list;
145 struct list_head pa_group_list; 123 struct list_head pa_group_list;
@@ -217,6 +195,11 @@ struct ext4_allocation_context {
217 __u8 ac_op; /* operation, for history only */ 195 __u8 ac_op; /* operation, for history only */
218 struct page *ac_bitmap_page; 196 struct page *ac_bitmap_page;
219 struct page *ac_buddy_page; 197 struct page *ac_buddy_page;
198 /*
199 * pointer to the held semaphore upon successful
200 * block allocation
201 */
202 struct rw_semaphore *alloc_semp;
220 struct ext4_prealloc_space *ac_pa; 203 struct ext4_prealloc_space *ac_pa;
221 struct ext4_locality_group *ac_lg; 204 struct ext4_locality_group *ac_lg;
222}; 205};
@@ -250,6 +233,7 @@ struct ext4_buddy {
250 struct super_block *bd_sb; 233 struct super_block *bd_sb;
251 __u16 bd_blkbits; 234 __u16 bd_blkbits;
252 ext4_group_t bd_group; 235 ext4_group_t bd_group;
236 struct rw_semaphore *alloc_semp;
253}; 237};
254#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) 238#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
255#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) 239#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
@@ -259,51 +243,12 @@ static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
259{ 243{
260 return; 244 return;
261} 245}
262#else
263static void ext4_mb_store_history(struct ext4_allocation_context *ac);
264#endif 246#endif
265 247
266#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 248#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
267 249
268struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); 250struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
269 251static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
270static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
271 ext4_group_t group);
272static void ext4_mb_return_to_preallocation(struct inode *inode,
273 struct ext4_buddy *e4b, sector_t block,
274 int count);
275static void ext4_mb_put_pa(struct ext4_allocation_context *,
276 struct super_block *, struct ext4_prealloc_space *pa);
277static int ext4_mb_init_per_dev_proc(struct super_block *sb);
278static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
279static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
280
281
282static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
283{
284 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
285
286 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
287}
288
289static inline void ext4_unlock_group(struct super_block *sb,
290 ext4_group_t group)
291{
292 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
293
294 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
295}
296
297static inline int ext4_is_group_locked(struct super_block *sb,
298 ext4_group_t group)
299{
300 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
301
302 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
303 &(grinfo->bb_state));
304}
305
306static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
307 struct ext4_free_extent *fex) 252 struct ext4_free_extent *fex)
308{ 253{
309 ext4_fsblk_t block; 254 ext4_fsblk_t block;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index f2a9cf498ecd..734abca25e35 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -59,7 +59,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
59 /* 59 /*
60 * Make sure the credit we accumalated is not really high 60 * Make sure the credit we accumalated is not really high
61 */ 61 */
62 if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) { 62 if (needed && ext4_handle_has_enough_credits(handle,
63 EXT4_RESERVE_TRANS_BLOCKS)) {
63 retval = ext4_journal_restart(handle, needed); 64 retval = ext4_journal_restart(handle, needed);
64 if (retval) 65 if (retval)
65 goto err_out; 66 goto err_out;
@@ -229,7 +230,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
229{ 230{
230 int retval = 0, needed; 231 int retval = 0, needed;
231 232
232 if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) 233 if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
233 return 0; 234 return 0;
234 /* 235 /*
235 * We are freeing a blocks. During this we touch 236 * We are freeing a blocks. During this we touch
@@ -458,13 +459,13 @@ int ext4_ext_migrate(struct inode *inode)
458 struct list_blocks_struct lb; 459 struct list_blocks_struct lb;
459 unsigned long max_entries; 460 unsigned long max_entries;
460 461
461 if (!test_opt(inode->i_sb, EXTENTS)) 462 /*
462 /* 463 * If the filesystem does not support extents, or the inode
463 * if mounted with noextents we don't allow the migrate 464 * already is extent-based, error out.
464 */ 465 */
465 return -EINVAL; 466 if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
466 467 EXT4_FEATURE_INCOMPAT_EXTENTS) ||
467 if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 468 (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
468 return -EINVAL; 469 return -EINVAL;
469 470
470 if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) 471 if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 63adcb792988..fec0b4c2f5f1 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -74,10 +74,6 @@ static struct buffer_head *ext4_append(handle_t *handle,
74#define assert(test) J_ASSERT(test) 74#define assert(test) J_ASSERT(test)
75#endif 75#endif
76 76
77#ifndef swap
78#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
79#endif
80
81#ifdef DX_DEBUG 77#ifdef DX_DEBUG
82#define dxtrace(command) command 78#define dxtrace(command) command
83#else 79#else
@@ -372,6 +368,8 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
372 goto fail; 368 goto fail;
373 } 369 }
374 hinfo->hash_version = root->info.hash_version; 370 hinfo->hash_version = root->info.hash_version;
371 if (hinfo->hash_version <= DX_HASH_TEA)
372 hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
375 hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; 373 hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
376 if (d_name) 374 if (d_name)
377 ext4fs_dirhash(d_name->name, d_name->len, hinfo); 375 ext4fs_dirhash(d_name->name, d_name->len, hinfo);
@@ -641,6 +639,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
641 dir = dir_file->f_path.dentry->d_inode; 639 dir = dir_file->f_path.dentry->d_inode;
642 if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { 640 if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
643 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; 641 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
642 if (hinfo.hash_version <= DX_HASH_TEA)
643 hinfo.hash_version +=
644 EXT4_SB(dir->i_sb)->s_hash_unsigned;
644 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; 645 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
645 count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, 646 count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
646 start_hash, start_minor_hash); 647 start_hash, start_minor_hash);
@@ -806,7 +807,7 @@ static inline int ext4_match (int len, const char * const name,
806static inline int search_dirblock(struct buffer_head *bh, 807static inline int search_dirblock(struct buffer_head *bh,
807 struct inode *dir, 808 struct inode *dir,
808 const struct qstr *d_name, 809 const struct qstr *d_name,
809 unsigned long offset, 810 unsigned int offset,
810 struct ext4_dir_entry_2 ** res_dir) 811 struct ext4_dir_entry_2 ** res_dir)
811{ 812{
812 struct ext4_dir_entry_2 * de; 813 struct ext4_dir_entry_2 * de;
@@ -1043,11 +1044,11 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1043 bh = ext4_find_entry(dir, &dentry->d_name, &de); 1044 bh = ext4_find_entry(dir, &dentry->d_name, &de);
1044 inode = NULL; 1045 inode = NULL;
1045 if (bh) { 1046 if (bh) {
1046 unsigned long ino = le32_to_cpu(de->inode); 1047 __u32 ino = le32_to_cpu(de->inode);
1047 brelse(bh); 1048 brelse(bh);
1048 if (!ext4_valid_inum(dir->i_sb, ino)) { 1049 if (!ext4_valid_inum(dir->i_sb, ino)) {
1049 ext4_error(dir->i_sb, "ext4_lookup", 1050 ext4_error(dir->i_sb, "ext4_lookup",
1050 "bad inode number: %lu", ino); 1051 "bad inode number: %u", ino);
1051 return ERR_PTR(-EIO); 1052 return ERR_PTR(-EIO);
1052 } 1053 }
1053 inode = ext4_iget(dir->i_sb, ino); 1054 inode = ext4_iget(dir->i_sb, ino);
@@ -1060,7 +1061,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1060 1061
1061struct dentry *ext4_get_parent(struct dentry *child) 1062struct dentry *ext4_get_parent(struct dentry *child)
1062{ 1063{
1063 unsigned long ino; 1064 __u32 ino;
1064 struct inode *inode; 1065 struct inode *inode;
1065 static const struct qstr dotdot = { 1066 static const struct qstr dotdot = {
1066 .name = "..", 1067 .name = "..",
@@ -1078,7 +1079,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
1078 1079
1079 if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { 1080 if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
1080 ext4_error(child->d_inode->i_sb, "ext4_get_parent", 1081 ext4_error(child->d_inode->i_sb, "ext4_get_parent",
1081 "bad inode number: %lu", ino); 1082 "bad inode number: %u", ino);
1082 return ERR_PTR(-EIO); 1083 return ERR_PTR(-EIO);
1083 } 1084 }
1084 1085
@@ -1166,9 +1167,9 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1166 u32 hash2; 1167 u32 hash2;
1167 struct dx_map_entry *map; 1168 struct dx_map_entry *map;
1168 char *data1 = (*bh)->b_data, *data2; 1169 char *data1 = (*bh)->b_data, *data2;
1169 unsigned split, move, size, i; 1170 unsigned split, move, size;
1170 struct ext4_dir_entry_2 *de = NULL, *de2; 1171 struct ext4_dir_entry_2 *de = NULL, *de2;
1171 int err = 0; 1172 int err = 0, i;
1172 1173
1173 bh2 = ext4_append (handle, dir, &newblock, &err); 1174 bh2 = ext4_append (handle, dir, &newblock, &err);
1174 if (!(bh2)) { 1175 if (!(bh2)) {
@@ -1228,10 +1229,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1228 de = de2; 1229 de = de2;
1229 } 1230 }
1230 dx_insert_block(frame, hash2 + continued, newblock); 1231 dx_insert_block(frame, hash2 + continued, newblock);
1231 err = ext4_journal_dirty_metadata(handle, bh2); 1232 err = ext4_handle_dirty_metadata(handle, dir, bh2);
1232 if (err) 1233 if (err)
1233 goto journal_error; 1234 goto journal_error;
1234 err = ext4_journal_dirty_metadata(handle, frame->bh); 1235 err = ext4_handle_dirty_metadata(handle, dir, frame->bh);
1235 if (err) 1236 if (err)
1236 goto journal_error; 1237 goto journal_error;
1237 brelse(bh2); 1238 brelse(bh2);
@@ -1266,7 +1267,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1266 struct inode *dir = dentry->d_parent->d_inode; 1267 struct inode *dir = dentry->d_parent->d_inode;
1267 const char *name = dentry->d_name.name; 1268 const char *name = dentry->d_name.name;
1268 int namelen = dentry->d_name.len; 1269 int namelen = dentry->d_name.len;
1269 unsigned long offset = 0; 1270 unsigned int offset = 0;
1270 unsigned short reclen; 1271 unsigned short reclen;
1271 int nlen, rlen, err; 1272 int nlen, rlen, err;
1272 char *top; 1273 char *top;
@@ -1335,8 +1336,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1335 ext4_update_dx_flag(dir); 1336 ext4_update_dx_flag(dir);
1336 dir->i_version++; 1337 dir->i_version++;
1337 ext4_mark_inode_dirty(handle, dir); 1338 ext4_mark_inode_dirty(handle, dir);
1338 BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); 1339 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1339 err = ext4_journal_dirty_metadata(handle, bh); 1340 err = ext4_handle_dirty_metadata(handle, dir, bh);
1340 if (err) 1341 if (err)
1341 ext4_std_error(dir->i_sb, err); 1342 ext4_std_error(dir->i_sb, err);
1342 brelse(bh); 1343 brelse(bh);
@@ -1408,6 +1409,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1408 1409
1409 /* Initialize as for dx_probe */ 1410 /* Initialize as for dx_probe */
1410 hinfo.hash_version = root->info.hash_version; 1411 hinfo.hash_version = root->info.hash_version;
1412 if (hinfo.hash_version <= DX_HASH_TEA)
1413 hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
1411 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; 1414 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1412 ext4fs_dirhash(name, namelen, &hinfo); 1415 ext4fs_dirhash(name, namelen, &hinfo);
1413 frame = frames; 1416 frame = frames;
@@ -1437,7 +1440,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1437 struct inode *inode) 1440 struct inode *inode)
1438{ 1441{
1439 struct inode *dir = dentry->d_parent->d_inode; 1442 struct inode *dir = dentry->d_parent->d_inode;
1440 unsigned long offset;
1441 struct buffer_head *bh; 1443 struct buffer_head *bh;
1442 struct ext4_dir_entry_2 *de; 1444 struct ext4_dir_entry_2 *de;
1443 struct super_block *sb; 1445 struct super_block *sb;
@@ -1459,7 +1461,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1459 ext4_mark_inode_dirty(handle, dir); 1461 ext4_mark_inode_dirty(handle, dir);
1460 } 1462 }
1461 blocks = dir->i_size >> sb->s_blocksize_bits; 1463 blocks = dir->i_size >> sb->s_blocksize_bits;
1462 for (block = 0, offset = 0; block < blocks; block++) { 1464 for (block = 0; block < blocks; block++) {
1463 bh = ext4_bread(handle, dir, block, 0, &retval); 1465 bh = ext4_bread(handle, dir, block, 0, &retval);
1464 if(!bh) 1466 if(!bh)
1465 return retval; 1467 return retval;
@@ -1574,7 +1576,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1574 dxtrace(dx_show_index("node", frames[1].entries)); 1576 dxtrace(dx_show_index("node", frames[1].entries));
1575 dxtrace(dx_show_index("node", 1577 dxtrace(dx_show_index("node",
1576 ((struct dx_node *) bh2->b_data)->entries)); 1578 ((struct dx_node *) bh2->b_data)->entries));
1577 err = ext4_journal_dirty_metadata(handle, bh2); 1579 err = ext4_handle_dirty_metadata(handle, inode, bh2);
1578 if (err) 1580 if (err)
1579 goto journal_error; 1581 goto journal_error;
1580 brelse (bh2); 1582 brelse (bh2);
@@ -1600,7 +1602,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1600 if (err) 1602 if (err)
1601 goto journal_error; 1603 goto journal_error;
1602 } 1604 }
1603 ext4_journal_dirty_metadata(handle, frames[0].bh); 1605 ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
1604 } 1606 }
1605 de = do_split(handle, dir, &bh, frame, &hinfo, &err); 1607 de = do_split(handle, dir, &bh, frame, &hinfo, &err);
1606 if (!de) 1608 if (!de)
@@ -1646,8 +1648,8 @@ static int ext4_delete_entry(handle_t *handle,
1646 else 1648 else
1647 de->inode = 0; 1649 de->inode = 0;
1648 dir->i_version++; 1650 dir->i_version++;
1649 BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); 1651 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1650 ext4_journal_dirty_metadata(handle, bh); 1652 ext4_handle_dirty_metadata(handle, dir, bh);
1651 return 0; 1653 return 0;
1652 } 1654 }
1653 i += ext4_rec_len_from_disk(de->rec_len); 1655 i += ext4_rec_len_from_disk(de->rec_len);
@@ -1693,9 +1695,11 @@ static int ext4_add_nondir(handle_t *handle,
1693 if (!err) { 1695 if (!err) {
1694 ext4_mark_inode_dirty(handle, inode); 1696 ext4_mark_inode_dirty(handle, inode);
1695 d_instantiate(dentry, inode); 1697 d_instantiate(dentry, inode);
1698 unlock_new_inode(inode);
1696 return 0; 1699 return 0;
1697 } 1700 }
1698 drop_nlink(inode); 1701 drop_nlink(inode);
1702 unlock_new_inode(inode);
1699 iput(inode); 1703 iput(inode);
1700 return err; 1704 return err;
1701} 1705}
@@ -1723,7 +1727,7 @@ retry:
1723 return PTR_ERR(handle); 1727 return PTR_ERR(handle);
1724 1728
1725 if (IS_DIRSYNC(dir)) 1729 if (IS_DIRSYNC(dir))
1726 handle->h_sync = 1; 1730 ext4_handle_sync(handle);
1727 1731
1728 inode = ext4_new_inode (handle, dir, mode); 1732 inode = ext4_new_inode (handle, dir, mode);
1729 err = PTR_ERR(inode); 1733 err = PTR_ERR(inode);
@@ -1757,7 +1761,7 @@ retry:
1757 return PTR_ERR(handle); 1761 return PTR_ERR(handle);
1758 1762
1759 if (IS_DIRSYNC(dir)) 1763 if (IS_DIRSYNC(dir))
1760 handle->h_sync = 1; 1764 ext4_handle_sync(handle);
1761 1765
1762 inode = ext4_new_inode(handle, dir, mode); 1766 inode = ext4_new_inode(handle, dir, mode);
1763 err = PTR_ERR(inode); 1767 err = PTR_ERR(inode);
@@ -1793,7 +1797,7 @@ retry:
1793 return PTR_ERR(handle); 1797 return PTR_ERR(handle);
1794 1798
1795 if (IS_DIRSYNC(dir)) 1799 if (IS_DIRSYNC(dir))
1796 handle->h_sync = 1; 1800 ext4_handle_sync(handle);
1797 1801
1798 inode = ext4_new_inode(handle, dir, S_IFDIR | mode); 1802 inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
1799 err = PTR_ERR(inode); 1803 err = PTR_ERR(inode);
@@ -1822,14 +1826,15 @@ retry:
1822 strcpy(de->name, ".."); 1826 strcpy(de->name, "..");
1823 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1827 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1824 inode->i_nlink = 2; 1828 inode->i_nlink = 2;
1825 BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); 1829 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
1826 ext4_journal_dirty_metadata(handle, dir_block); 1830 ext4_handle_dirty_metadata(handle, dir, dir_block);
1827 brelse(dir_block); 1831 brelse(dir_block);
1828 ext4_mark_inode_dirty(handle, inode); 1832 ext4_mark_inode_dirty(handle, inode);
1829 err = ext4_add_entry(handle, dentry, inode); 1833 err = ext4_add_entry(handle, dentry, inode);
1830 if (err) { 1834 if (err) {
1831out_clear_inode: 1835out_clear_inode:
1832 clear_nlink(inode); 1836 clear_nlink(inode);
1837 unlock_new_inode(inode);
1833 ext4_mark_inode_dirty(handle, inode); 1838 ext4_mark_inode_dirty(handle, inode);
1834 iput(inode); 1839 iput(inode);
1835 goto out_stop; 1840 goto out_stop;
@@ -1838,6 +1843,7 @@ out_clear_inode:
1838 ext4_update_dx_flag(dir); 1843 ext4_update_dx_flag(dir);
1839 ext4_mark_inode_dirty(handle, dir); 1844 ext4_mark_inode_dirty(handle, dir);
1840 d_instantiate(dentry, inode); 1845 d_instantiate(dentry, inode);
1846 unlock_new_inode(inode);
1841out_stop: 1847out_stop:
1842 ext4_journal_stop(handle); 1848 ext4_journal_stop(handle);
1843 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 1849 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
@@ -1850,7 +1856,7 @@ out_stop:
1850 */ 1856 */
1851static int empty_dir(struct inode *inode) 1857static int empty_dir(struct inode *inode)
1852{ 1858{
1853 unsigned long offset; 1859 unsigned int offset;
1854 struct buffer_head *bh; 1860 struct buffer_head *bh;
1855 struct ext4_dir_entry_2 *de, *de1; 1861 struct ext4_dir_entry_2 *de, *de1;
1856 struct super_block *sb; 1862 struct super_block *sb;
@@ -1895,7 +1901,7 @@ static int empty_dir(struct inode *inode)
1895 if (err) 1901 if (err)
1896 ext4_error(sb, __func__, 1902 ext4_error(sb, __func__,
1897 "error %d reading directory" 1903 "error %d reading directory"
1898 " #%lu offset %lu", 1904 " #%lu offset %u",
1899 err, inode->i_ino, offset); 1905 err, inode->i_ino, offset);
1900 offset += sb->s_blocksize; 1906 offset += sb->s_blocksize;
1901 continue; 1907 continue;
@@ -1933,6 +1939,9 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
1933 struct ext4_iloc iloc; 1939 struct ext4_iloc iloc;
1934 int err = 0, rc; 1940 int err = 0, rc;
1935 1941
1942 if (!ext4_handle_valid(handle))
1943 return 0;
1944
1936 lock_super(sb); 1945 lock_super(sb);
1937 if (!list_empty(&EXT4_I(inode)->i_orphan)) 1946 if (!list_empty(&EXT4_I(inode)->i_orphan))
1938 goto out_unlock; 1947 goto out_unlock;
@@ -1961,7 +1970,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
1961 /* Insert this inode at the head of the on-disk orphan list... */ 1970 /* Insert this inode at the head of the on-disk orphan list... */
1962 NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); 1971 NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
1963 EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); 1972 EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
1964 err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); 1973 err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh);
1965 rc = ext4_mark_iloc_dirty(handle, inode, &iloc); 1974 rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
1966 if (!err) 1975 if (!err)
1967 err = rc; 1976 err = rc;
@@ -1995,10 +2004,13 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
1995 struct list_head *prev; 2004 struct list_head *prev;
1996 struct ext4_inode_info *ei = EXT4_I(inode); 2005 struct ext4_inode_info *ei = EXT4_I(inode);
1997 struct ext4_sb_info *sbi; 2006 struct ext4_sb_info *sbi;
1998 unsigned long ino_next; 2007 __u32 ino_next;
1999 struct ext4_iloc iloc; 2008 struct ext4_iloc iloc;
2000 int err = 0; 2009 int err = 0;
2001 2010
2011 if (!ext4_handle_valid(handle))
2012 return 0;
2013
2002 lock_super(inode->i_sb); 2014 lock_super(inode->i_sb);
2003 if (list_empty(&ei->i_orphan)) { 2015 if (list_empty(&ei->i_orphan)) {
2004 unlock_super(inode->i_sb); 2016 unlock_super(inode->i_sb);
@@ -2017,7 +2029,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2017 * transaction handle with which to update the orphan list on 2029 * transaction handle with which to update the orphan list on
2018 * disk, but we still need to remove the inode from the linked 2030 * disk, but we still need to remove the inode from the linked
2019 * list in memory. */ 2031 * list in memory. */
2020 if (!handle) 2032 if (sbi->s_journal && !handle)
2021 goto out; 2033 goto out;
2022 2034
2023 err = ext4_reserve_inode_write(handle, inode, &iloc); 2035 err = ext4_reserve_inode_write(handle, inode, &iloc);
@@ -2025,19 +2037,19 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2025 goto out_err; 2037 goto out_err;
2026 2038
2027 if (prev == &sbi->s_orphan) { 2039 if (prev == &sbi->s_orphan) {
2028 jbd_debug(4, "superblock will point to %lu\n", ino_next); 2040 jbd_debug(4, "superblock will point to %u\n", ino_next);
2029 BUFFER_TRACE(sbi->s_sbh, "get_write_access"); 2041 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
2030 err = ext4_journal_get_write_access(handle, sbi->s_sbh); 2042 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
2031 if (err) 2043 if (err)
2032 goto out_brelse; 2044 goto out_brelse;
2033 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); 2045 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
2034 err = ext4_journal_dirty_metadata(handle, sbi->s_sbh); 2046 err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh);
2035 } else { 2047 } else {
2036 struct ext4_iloc iloc2; 2048 struct ext4_iloc iloc2;
2037 struct inode *i_prev = 2049 struct inode *i_prev =
2038 &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode; 2050 &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
2039 2051
2040 jbd_debug(4, "orphan inode %lu will point to %lu\n", 2052 jbd_debug(4, "orphan inode %lu will point to %u\n",
2041 i_prev->i_ino, ino_next); 2053 i_prev->i_ino, ino_next);
2042 err = ext4_reserve_inode_write(handle, i_prev, &iloc2); 2054 err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
2043 if (err) 2055 if (err)
@@ -2082,7 +2094,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2082 goto end_rmdir; 2094 goto end_rmdir;
2083 2095
2084 if (IS_DIRSYNC(dir)) 2096 if (IS_DIRSYNC(dir))
2085 handle->h_sync = 1; 2097 ext4_handle_sync(handle);
2086 2098
2087 inode = dentry->d_inode; 2099 inode = dentry->d_inode;
2088 2100
@@ -2136,7 +2148,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2136 return PTR_ERR(handle); 2148 return PTR_ERR(handle);
2137 2149
2138 if (IS_DIRSYNC(dir)) 2150 if (IS_DIRSYNC(dir))
2139 handle->h_sync = 1; 2151 ext4_handle_sync(handle);
2140 2152
2141 retval = -ENOENT; 2153 retval = -ENOENT;
2142 bh = ext4_find_entry(dir, &dentry->d_name, &de); 2154 bh = ext4_find_entry(dir, &dentry->d_name, &de);
@@ -2193,7 +2205,7 @@ retry:
2193 return PTR_ERR(handle); 2205 return PTR_ERR(handle);
2194 2206
2195 if (IS_DIRSYNC(dir)) 2207 if (IS_DIRSYNC(dir))
2196 handle->h_sync = 1; 2208 ext4_handle_sync(handle);
2197 2209
2198 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); 2210 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
2199 err = PTR_ERR(inode); 2211 err = PTR_ERR(inode);
@@ -2208,10 +2220,10 @@ retry:
2208 * We have a transaction open. All is sweetness. It also sets 2220 * We have a transaction open. All is sweetness. It also sets
2209 * i_size in generic_commit_write(). 2221 * i_size in generic_commit_write().
2210 */ 2222 */
2211 err = __page_symlink(inode, symname, l, 2223 err = __page_symlink(inode, symname, l, 1);
2212 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
2213 if (err) { 2224 if (err) {
2214 clear_nlink(inode); 2225 clear_nlink(inode);
2226 unlock_new_inode(inode);
2215 ext4_mark_inode_dirty(handle, inode); 2227 ext4_mark_inode_dirty(handle, inode);
2216 iput(inode); 2228 iput(inode);
2217 goto out_stop; 2229 goto out_stop;
@@ -2256,13 +2268,20 @@ retry:
2256 return PTR_ERR(handle); 2268 return PTR_ERR(handle);
2257 2269
2258 if (IS_DIRSYNC(dir)) 2270 if (IS_DIRSYNC(dir))
2259 handle->h_sync = 1; 2271 ext4_handle_sync(handle);
2260 2272
2261 inode->i_ctime = ext4_current_time(inode); 2273 inode->i_ctime = ext4_current_time(inode);
2262 ext4_inc_count(handle, inode); 2274 ext4_inc_count(handle, inode);
2263 atomic_inc(&inode->i_count); 2275 atomic_inc(&inode->i_count);
2264 2276
2265 err = ext4_add_nondir(handle, dentry, inode); 2277 err = ext4_add_entry(handle, dentry, inode);
2278 if (!err) {
2279 ext4_mark_inode_dirty(handle, inode);
2280 d_instantiate(dentry, inode);
2281 } else {
2282 drop_nlink(inode);
2283 iput(inode);
2284 }
2266 ext4_journal_stop(handle); 2285 ext4_journal_stop(handle);
2267 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2286 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2268 goto retry; 2287 goto retry;
@@ -2298,7 +2317,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2298 return PTR_ERR(handle); 2317 return PTR_ERR(handle);
2299 2318
2300 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) 2319 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
2301 handle->h_sync = 1; 2320 ext4_handle_sync(handle);
2302 2321
2303 old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de); 2322 old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
2304 /* 2323 /*
@@ -2352,8 +2371,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2352 new_dir->i_ctime = new_dir->i_mtime = 2371 new_dir->i_ctime = new_dir->i_mtime =
2353 ext4_current_time(new_dir); 2372 ext4_current_time(new_dir);
2354 ext4_mark_inode_dirty(handle, new_dir); 2373 ext4_mark_inode_dirty(handle, new_dir);
2355 BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata"); 2374 BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
2356 ext4_journal_dirty_metadata(handle, new_bh); 2375 ext4_handle_dirty_metadata(handle, new_dir, new_bh);
2357 brelse(new_bh); 2376 brelse(new_bh);
2358 new_bh = NULL; 2377 new_bh = NULL;
2359 } 2378 }
@@ -2403,8 +2422,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2403 BUFFER_TRACE(dir_bh, "get_write_access"); 2422 BUFFER_TRACE(dir_bh, "get_write_access");
2404 ext4_journal_get_write_access(handle, dir_bh); 2423 ext4_journal_get_write_access(handle, dir_bh);
2405 PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); 2424 PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
2406 BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata"); 2425 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
2407 ext4_journal_dirty_metadata(handle, dir_bh); 2426 ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
2408 ext4_dec_count(handle, old_dir); 2427 ext4_dec_count(handle, old_dir);
2409 if (new_inode) { 2428 if (new_inode) {
2410 /* checked empty_dir above, can't have another parent, 2429 /* checked empty_dir above, can't have another parent,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b6ec1843a015..c328be5d6885 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -50,7 +50,7 @@ static int verify_group_input(struct super_block *sb,
50 ext4_get_group_no_and_offset(sb, start, NULL, &offset); 50 ext4_get_group_no_and_offset(sb, start, NULL, &offset);
51 if (group != sbi->s_groups_count) 51 if (group != sbi->s_groups_count)
52 ext4_warning(sb, __func__, 52 ext4_warning(sb, __func__,
53 "Cannot add at group %u (only %lu groups)", 53 "Cannot add at group %u (only %u groups)",
54 input->group, sbi->s_groups_count); 54 input->group, sbi->s_groups_count);
55 else if (offset != 0) 55 else if (offset != 0)
56 ext4_warning(sb, __func__, "Last group not full"); 56 ext4_warning(sb, __func__, "Last group not full");
@@ -149,7 +149,7 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
149{ 149{
150 int err; 150 int err;
151 151
152 if (handle->h_buffer_credits >= thresh) 152 if (ext4_handle_has_enough_credits(handle, thresh))
153 return 0; 153 return 0;
154 154
155 err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA); 155 err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
@@ -232,7 +232,7 @@ static int setup_new_group_blocks(struct super_block *sb,
232 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 232 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
233 set_buffer_uptodate(gdb); 233 set_buffer_uptodate(gdb);
234 unlock_buffer(gdb); 234 unlock_buffer(gdb);
235 ext4_journal_dirty_metadata(handle, gdb); 235 ext4_handle_dirty_metadata(handle, NULL, gdb);
236 ext4_set_bit(bit, bh->b_data); 236 ext4_set_bit(bit, bh->b_data);
237 brelse(gdb); 237 brelse(gdb);
238 } 238 }
@@ -251,7 +251,7 @@ static int setup_new_group_blocks(struct super_block *sb,
251 err = PTR_ERR(bh); 251 err = PTR_ERR(bh);
252 goto exit_bh; 252 goto exit_bh;
253 } 253 }
254 ext4_journal_dirty_metadata(handle, gdb); 254 ext4_handle_dirty_metadata(handle, NULL, gdb);
255 ext4_set_bit(bit, bh->b_data); 255 ext4_set_bit(bit, bh->b_data);
256 brelse(gdb); 256 brelse(gdb);
257 } 257 }
@@ -276,7 +276,7 @@ static int setup_new_group_blocks(struct super_block *sb,
276 err = PTR_ERR(it); 276 err = PTR_ERR(it);
277 goto exit_bh; 277 goto exit_bh;
278 } 278 }
279 ext4_journal_dirty_metadata(handle, it); 279 ext4_handle_dirty_metadata(handle, NULL, it);
280 brelse(it); 280 brelse(it);
281 ext4_set_bit(bit, bh->b_data); 281 ext4_set_bit(bit, bh->b_data);
282 } 282 }
@@ -284,11 +284,9 @@ static int setup_new_group_blocks(struct super_block *sb,
284 if ((err = extend_or_restart_transaction(handle, 2, bh))) 284 if ((err = extend_or_restart_transaction(handle, 2, bh)))
285 goto exit_bh; 285 goto exit_bh;
286 286
287 mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb), 287 mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
288 bh->b_data); 288 ext4_handle_dirty_metadata(handle, NULL, bh);
289 ext4_journal_dirty_metadata(handle, bh);
290 brelse(bh); 289 brelse(bh);
291
292 /* Mark unused entries in inode bitmap used */ 290 /* Mark unused entries in inode bitmap used */
293 ext4_debug("clear inode bitmap %#04llx (+%llu)\n", 291 ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
294 input->inode_bitmap, input->inode_bitmap - start); 292 input->inode_bitmap, input->inode_bitmap - start);
@@ -297,9 +295,9 @@ static int setup_new_group_blocks(struct super_block *sb,
297 goto exit_journal; 295 goto exit_journal;
298 } 296 }
299 297
300 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), 298 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
301 bh->b_data); 299 bh->b_data);
302 ext4_journal_dirty_metadata(handle, bh); 300 ext4_handle_dirty_metadata(handle, NULL, bh);
303exit_bh: 301exit_bh:
304 brelse(bh); 302 brelse(bh);
305 303
@@ -486,12 +484,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
486 * reserved inode, and will become GDT blocks (primary and backup). 484 * reserved inode, and will become GDT blocks (primary and backup).
487 */ 485 */
488 data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0; 486 data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
489 ext4_journal_dirty_metadata(handle, dind); 487 ext4_handle_dirty_metadata(handle, NULL, dind);
490 brelse(dind); 488 brelse(dind);
491 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; 489 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
492 ext4_mark_iloc_dirty(handle, inode, &iloc); 490 ext4_mark_iloc_dirty(handle, inode, &iloc);
493 memset((*primary)->b_data, 0, sb->s_blocksize); 491 memset((*primary)->b_data, 0, sb->s_blocksize);
494 ext4_journal_dirty_metadata(handle, *primary); 492 ext4_handle_dirty_metadata(handle, NULL, *primary);
495 493
496 o_group_desc = EXT4_SB(sb)->s_group_desc; 494 o_group_desc = EXT4_SB(sb)->s_group_desc;
497 memcpy(n_group_desc, o_group_desc, 495 memcpy(n_group_desc, o_group_desc,
@@ -502,7 +500,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
502 kfree(o_group_desc); 500 kfree(o_group_desc);
503 501
504 le16_add_cpu(&es->s_reserved_gdt_blocks, -1); 502 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
505 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); 503 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
506 504
507 return 0; 505 return 0;
508 506
@@ -618,7 +616,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
618 primary[i]->b_blocknr, gdbackups, 616 primary[i]->b_blocknr, gdbackups,
619 blk + primary[i]->b_blocknr); */ 617 blk + primary[i]->b_blocknr); */
620 data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); 618 data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
621 err2 = ext4_journal_dirty_metadata(handle, primary[i]); 619 err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]);
622 if (!err) 620 if (!err)
623 err = err2; 621 err = err2;
624 } 622 }
@@ -676,7 +674,8 @@ static void update_backups(struct super_block *sb,
676 struct buffer_head *bh; 674 struct buffer_head *bh;
677 675
678 /* Out of journal space, and can't get more - abort - so sad */ 676 /* Out of journal space, and can't get more - abort - so sad */
679 if (handle->h_buffer_credits == 0 && 677 if (ext4_handle_valid(handle) &&
678 handle->h_buffer_credits == 0 &&
680 ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) && 679 ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
681 (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) 680 (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
682 break; 681 break;
@@ -696,7 +695,7 @@ static void update_backups(struct super_block *sb,
696 memset(bh->b_data + size, 0, rest); 695 memset(bh->b_data + size, 0, rest);
697 set_buffer_uptodate(bh); 696 set_buffer_uptodate(bh);
698 unlock_buffer(bh); 697 unlock_buffer(bh);
699 ext4_journal_dirty_metadata(handle, bh); 698 ext4_handle_dirty_metadata(handle, NULL, bh);
700 brelse(bh); 699 brelse(bh);
701 } 700 }
702 if ((err2 = ext4_journal_stop(handle)) && !err) 701 if ((err2 = ext4_journal_stop(handle)) && !err)
@@ -715,7 +714,7 @@ static void update_backups(struct super_block *sb,
715exit_err: 714exit_err:
716 if (err) { 715 if (err) {
717 ext4_warning(sb, __func__, 716 ext4_warning(sb, __func__,
718 "can't update backup for group %lu (err %d), " 717 "can't update backup for group %u (err %d), "
719 "forcing fsck on next reboot", group, err); 718 "forcing fsck on next reboot", group, err);
720 sbi->s_mount_state &= ~EXT4_VALID_FS; 719 sbi->s_mount_state &= ~EXT4_VALID_FS;
721 sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 720 sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
@@ -747,6 +746,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
747 struct inode *inode = NULL; 746 struct inode *inode = NULL;
748 handle_t *handle; 747 handle_t *handle;
749 int gdb_off, gdb_num; 748 int gdb_off, gdb_num;
749 int num_grp_locked = 0;
750 int err, err2; 750 int err, err2;
751 751
752 gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); 752 gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
@@ -761,13 +761,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
761 761
762 if (ext4_blocks_count(es) + input->blocks_count < 762 if (ext4_blocks_count(es) + input->blocks_count <
763 ext4_blocks_count(es)) { 763 ext4_blocks_count(es)) {
764 ext4_warning(sb, __func__, "blocks_count overflow\n"); 764 ext4_warning(sb, __func__, "blocks_count overflow");
765 return -EINVAL; 765 return -EINVAL;
766 } 766 }
767 767
768 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < 768 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
769 le32_to_cpu(es->s_inodes_count)) { 769 le32_to_cpu(es->s_inodes_count)) {
770 ext4_warning(sb, __func__, "inodes_count overflow\n"); 770 ext4_warning(sb, __func__, "inodes_count overflow");
771 return -EINVAL; 771 return -EINVAL;
772 } 772 }
773 773
@@ -787,6 +787,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
787 } 787 }
788 } 788 }
789 789
790
790 if ((err = verify_group_input(sb, input))) 791 if ((err = verify_group_input(sb, input)))
791 goto exit_put; 792 goto exit_put;
792 793
@@ -855,6 +856,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
855 * using the new disk blocks. 856 * using the new disk blocks.
856 */ 857 */
857 858
859 num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group);
858 /* Update group descriptor block for new group */ 860 /* Update group descriptor block for new group */
859 gdp = (struct ext4_group_desc *)((char *)primary->b_data + 861 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
860 gdb_off * EXT4_DESC_SIZE(sb)); 862 gdb_off * EXT4_DESC_SIZE(sb));
@@ -862,17 +864,20 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
862 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ 864 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
863 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ 865 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
864 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ 866 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
865 gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); 867 ext4_free_blks_set(sb, gdp, input->free_blocks_count);
866 gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); 868 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
869 gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
867 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); 870 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
868 871
869 /* 872 /*
870 * We can allocate memory for mb_alloc based on the new group 873 * We can allocate memory for mb_alloc based on the new group
871 * descriptor 874 * descriptor
872 */ 875 */
873 err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); 876 err = ext4_mb_add_groupinfo(sb, input->group, gdp);
874 if (err) 877 if (err) {
878 ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
875 goto exit_journal; 879 goto exit_journal;
880 }
876 881
877 /* 882 /*
878 * Make the new blocks and inodes valid next. We do this before 883 * Make the new blocks and inodes valid next. We do this before
@@ -914,8 +919,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
914 919
915 /* Update the global fs size fields */ 920 /* Update the global fs size fields */
916 sbi->s_groups_count++; 921 sbi->s_groups_count++;
922 ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
917 923
918 ext4_journal_dirty_metadata(handle, primary); 924 ext4_handle_dirty_metadata(handle, NULL, primary);
919 925
920 /* Update the reserved block counts only once the new group is 926 /* Update the reserved block counts only once the new group is
921 * active. */ 927 * active. */
@@ -937,7 +943,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
937 EXT4_INODES_PER_GROUP(sb); 943 EXT4_INODES_PER_GROUP(sb);
938 } 944 }
939 945
940 ext4_journal_dirty_metadata(handle, sbi->s_sbh); 946 ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
941 sb->s_dirt = 1; 947 sb->s_dirt = 1;
942 948
943exit_journal: 949exit_journal:
@@ -975,9 +981,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
975 struct buffer_head *bh; 981 struct buffer_head *bh;
976 handle_t *handle; 982 handle_t *handle;
977 int err; 983 int err;
978 unsigned long freed_blocks;
979 ext4_group_t group; 984 ext4_group_t group;
980 struct ext4_group_info *grp;
981 985
982 /* We don't need to worry about locking wrt other resizers just 986 /* We don't need to worry about locking wrt other resizers just
983 * yet: we're going to revalidate es->s_blocks_count after 987 * yet: we're going to revalidate es->s_blocks_count after
@@ -997,8 +1001,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
997 " too large to resize to %llu blocks safely\n", 1001 " too large to resize to %llu blocks safely\n",
998 sb->s_id, n_blocks_count); 1002 sb->s_id, n_blocks_count);
999 if (sizeof(sector_t) < 8) 1003 if (sizeof(sector_t) < 8)
1000 ext4_warning(sb, __func__, 1004 ext4_warning(sb, __func__, "CONFIG_LBD not enabled");
1001 "CONFIG_LBD not enabled\n");
1002 return -EINVAL; 1005 return -EINVAL;
1003 } 1006 }
1004 1007
@@ -1071,62 +1074,18 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1071 goto exit_put; 1074 goto exit_put;
1072 } 1075 }
1073 ext4_blocks_count_set(es, o_blocks_count + add); 1076 ext4_blocks_count_set(es, o_blocks_count + add);
1074 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); 1077 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
1075 sb->s_dirt = 1; 1078 sb->s_dirt = 1;
1076 unlock_super(sb); 1079 unlock_super(sb);
1077 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1080 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1078 o_blocks_count + add); 1081 o_blocks_count + add);
1079 ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); 1082 /* We add the blocks to the bitmap and set the group need init bit */
1083 ext4_add_groupblocks(handle, sb, o_blocks_count, add);
1080 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, 1084 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
1081 o_blocks_count + add); 1085 o_blocks_count + add);
1082 if ((err = ext4_journal_stop(handle))) 1086 if ((err = ext4_journal_stop(handle)))
1083 goto exit_put; 1087 goto exit_put;
1084 1088
1085 /*
1086 * Mark mballoc pages as not up to date so that they will be updated
1087 * next time they are loaded by ext4_mb_load_buddy.
1088 *
1089 * XXX Bad, Bad, BAD!!! We should not be overloading the
1090 * Uptodate flag, particularly on thte bitmap bh, as way of
1091 * hinting to ext4_mb_load_buddy() that it needs to be
1092 * overloaded. A user could take a LVM snapshot, then do an
1093 * on-line fsck, and clear the uptodate flag, and this would
1094 * not be a bug in userspace, but a bug in the kernel. FIXME!!!
1095 */
1096 {
1097 struct ext4_sb_info *sbi = EXT4_SB(sb);
1098 struct inode *inode = sbi->s_buddy_cache;
1099 int blocks_per_page;
1100 int block;
1101 int pnum;
1102 struct page *page;
1103
1104 /* Set buddy page as not up to date */
1105 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1106 block = group * 2;
1107 pnum = block / blocks_per_page;
1108 page = find_get_page(inode->i_mapping, pnum);
1109 if (page != NULL) {
1110 ClearPageUptodate(page);
1111 page_cache_release(page);
1112 }
1113
1114 /* Set bitmap page as not up to date */
1115 block++;
1116 pnum = block / blocks_per_page;
1117 page = find_get_page(inode->i_mapping, pnum);
1118 if (page != NULL) {
1119 ClearPageUptodate(page);
1120 page_cache_release(page);
1121 }
1122
1123 /* Get the info on the last group */
1124 grp = ext4_get_group_info(sb, group);
1125
1126 /* Update free blocks in group info */
1127 ext4_mb_update_group_info(grp, add);
1128 }
1129
1130 if (test_opt(sb, DEBUG)) 1089 if (test_opt(sb, DEBUG))
1131 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", 1090 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
1132 ext4_blocks_count(es)); 1091 ext4_blocks_count(es));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e4a241c65dbe..e5f06a5f045e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -51,9 +51,7 @@ struct proc_dir_entry *ext4_proc_root;
51 51
52static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 52static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
53 unsigned long journal_devnum); 53 unsigned long journal_devnum);
54static int ext4_create_journal(struct super_block *, struct ext4_super_block *, 54static int ext4_commit_super(struct super_block *sb,
55 unsigned int);
56static void ext4_commit_super(struct super_block *sb,
57 struct ext4_super_block *es, int sync); 55 struct ext4_super_block *es, int sync);
58static void ext4_mark_recovery_complete(struct super_block *sb, 56static void ext4_mark_recovery_complete(struct super_block *sb,
59 struct ext4_super_block *es); 57 struct ext4_super_block *es);
@@ -64,9 +62,9 @@ static const char *ext4_decode_error(struct super_block *sb, int errno,
64 char nbuf[16]); 62 char nbuf[16]);
65static int ext4_remount(struct super_block *sb, int *flags, char *data); 63static int ext4_remount(struct super_block *sb, int *flags, char *data);
66static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 64static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
67static void ext4_unlockfs(struct super_block *sb); 65static int ext4_unfreeze(struct super_block *sb);
68static void ext4_write_super(struct super_block *sb); 66static void ext4_write_super(struct super_block *sb);
69static void ext4_write_super_lockfs(struct super_block *sb); 67static int ext4_freeze(struct super_block *sb);
70 68
71 69
72ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 70ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
@@ -93,6 +91,38 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
93 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 91 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
94} 92}
95 93
94__u32 ext4_free_blks_count(struct super_block *sb,
95 struct ext4_group_desc *bg)
96{
97 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
98 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
99 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
100}
101
102__u32 ext4_free_inodes_count(struct super_block *sb,
103 struct ext4_group_desc *bg)
104{
105 return le16_to_cpu(bg->bg_free_inodes_count_lo) |
106 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
107 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
108}
109
110__u32 ext4_used_dirs_count(struct super_block *sb,
111 struct ext4_group_desc *bg)
112{
113 return le16_to_cpu(bg->bg_used_dirs_count_lo) |
114 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
115 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
116}
117
118__u32 ext4_itable_unused_count(struct super_block *sb,
119 struct ext4_group_desc *bg)
120{
121 return le16_to_cpu(bg->bg_itable_unused_lo) |
122 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
123 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
124}
125
96void ext4_block_bitmap_set(struct super_block *sb, 126void ext4_block_bitmap_set(struct super_block *sb,
97 struct ext4_group_desc *bg, ext4_fsblk_t blk) 127 struct ext4_group_desc *bg, ext4_fsblk_t blk)
98{ 128{
@@ -117,6 +147,38 @@ void ext4_inode_table_set(struct super_block *sb,
117 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 147 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
118} 148}
119 149
150void ext4_free_blks_set(struct super_block *sb,
151 struct ext4_group_desc *bg, __u32 count)
152{
153 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
154 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
155 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
156}
157
158void ext4_free_inodes_set(struct super_block *sb,
159 struct ext4_group_desc *bg, __u32 count)
160{
161 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
162 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
163 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
164}
165
166void ext4_used_dirs_set(struct super_block *sb,
167 struct ext4_group_desc *bg, __u32 count)
168{
169 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
170 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
171 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
172}
173
174void ext4_itable_unused_set(struct super_block *sb,
175 struct ext4_group_desc *bg, __u32 count)
176{
177 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
178 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
179 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
180}
181
120/* 182/*
121 * Wrappers for jbd2_journal_start/end. 183 * Wrappers for jbd2_journal_start/end.
122 * 184 *
@@ -136,13 +198,19 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
136 * backs (eg. EIO in the commit thread), then we still need to 198 * backs (eg. EIO in the commit thread), then we still need to
137 * take the FS itself readonly cleanly. */ 199 * take the FS itself readonly cleanly. */
138 journal = EXT4_SB(sb)->s_journal; 200 journal = EXT4_SB(sb)->s_journal;
139 if (is_journal_aborted(journal)) { 201 if (journal) {
140 ext4_abort(sb, __func__, 202 if (is_journal_aborted(journal)) {
141 "Detected aborted journal"); 203 ext4_abort(sb, __func__,
142 return ERR_PTR(-EROFS); 204 "Detected aborted journal");
205 return ERR_PTR(-EROFS);
206 }
207 return jbd2_journal_start(journal, nblocks);
143 } 208 }
144 209 /*
145 return jbd2_journal_start(journal, nblocks); 210 * We're not journaling, return the appropriate indication.
211 */
212 current->journal_info = EXT4_NOJOURNAL_HANDLE;
213 return current->journal_info;
146} 214}
147 215
148/* 216/*
@@ -157,6 +225,14 @@ int __ext4_journal_stop(const char *where, handle_t *handle)
157 int err; 225 int err;
158 int rc; 226 int rc;
159 227
228 if (!ext4_handle_valid(handle)) {
229 /*
230 * Do this here since we don't call jbd2_journal_stop() in
231 * no-journal mode.
232 */
233 current->journal_info = NULL;
234 return 0;
235 }
160 sb = handle->h_transaction->t_journal->j_private; 236 sb = handle->h_transaction->t_journal->j_private;
161 err = handle->h_err; 237 err = handle->h_err;
162 rc = jbd2_journal_stop(handle); 238 rc = jbd2_journal_stop(handle);
@@ -174,6 +250,8 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
174 char nbuf[16]; 250 char nbuf[16];
175 const char *errstr = ext4_decode_error(NULL, err, nbuf); 251 const char *errstr = ext4_decode_error(NULL, err, nbuf);
176 252
253 BUG_ON(!ext4_handle_valid(handle));
254
177 if (bh) 255 if (bh)
178 BUFFER_TRACE(bh, "abort"); 256 BUFFER_TRACE(bh, "abort");
179 257
@@ -350,6 +428,44 @@ void ext4_warning(struct super_block *sb, const char *function,
350 va_end(args); 428 va_end(args);
351} 429}
352 430
431void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
432 const char *function, const char *fmt, ...)
433__releases(bitlock)
434__acquires(bitlock)
435{
436 va_list args;
437 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
438
439 va_start(args, fmt);
440 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
441 vprintk(fmt, args);
442 printk("\n");
443 va_end(args);
444
445 if (test_opt(sb, ERRORS_CONT)) {
446 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
447 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
448 ext4_commit_super(sb, es, 0);
449 return;
450 }
451 ext4_unlock_group(sb, grp);
452 ext4_handle_error(sb);
453 /*
454 * We only get here in the ERRORS_RO case; relocking the group
455 * may be dangerous, but nothing bad will happen since the
456 * filesystem will have already been marked read/only and the
457 * journal has been aborted. We return 1 as a hint to callers
458 * who might what to use the return value from
459 * ext4_grp_locked_error() to distinguish beween the
460 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
461 * aggressively from the ext4 function in question, with a
462 * more appropriate error code.
463 */
464 ext4_lock_group(sb, grp);
465 return;
466}
467
468
353void ext4_update_dynamic_rev(struct super_block *sb) 469void ext4_update_dynamic_rev(struct super_block *sb)
354{ 470{
355 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 471 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -389,7 +505,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev)
389 return bdev; 505 return bdev;
390 506
391fail: 507fail:
392 printk(KERN_ERR "EXT4: failed to open journal device %s: %ld\n", 508 printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
393 __bdevname(dev, b), PTR_ERR(bdev)); 509 __bdevname(dev, b), PTR_ERR(bdev));
394 return NULL; 510 return NULL;
395} 511}
@@ -448,11 +564,13 @@ static void ext4_put_super(struct super_block *sb)
448 ext4_mb_release(sb); 564 ext4_mb_release(sb);
449 ext4_ext_release(sb); 565 ext4_ext_release(sb);
450 ext4_xattr_put_super(sb); 566 ext4_xattr_put_super(sb);
451 err = jbd2_journal_destroy(sbi->s_journal); 567 if (sbi->s_journal) {
452 sbi->s_journal = NULL; 568 err = jbd2_journal_destroy(sbi->s_journal);
453 if (err < 0) 569 sbi->s_journal = NULL;
454 ext4_abort(sb, __func__, "Couldn't clean up the journal"); 570 if (err < 0)
455 571 ext4_abort(sb, __func__,
572 "Couldn't clean up the journal");
573 }
456 if (!(sb->s_flags & MS_RDONLY)) { 574 if (!(sb->s_flags & MS_RDONLY)) {
457 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 575 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
458 es->s_state = cpu_to_le16(sbi->s_mount_state); 576 es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -522,6 +640,11 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
522 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 640 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
523 INIT_LIST_HEAD(&ei->i_prealloc_list); 641 INIT_LIST_HEAD(&ei->i_prealloc_list);
524 spin_lock_init(&ei->i_prealloc_lock); 642 spin_lock_init(&ei->i_prealloc_lock);
643 /*
644 * Note: We can be called before EXT4_SB(sb)->s_journal is set,
645 * therefore it can be null here. Don't check it, just initialize
646 * jinode.
647 */
525 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); 648 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
526 ei->i_reserved_data_blocks = 0; 649 ei->i_reserved_data_blocks = 0;
527 ei->i_reserved_meta_blocks = 0; 650 ei->i_reserved_meta_blocks = 0;
@@ -588,7 +711,8 @@ static void ext4_clear_inode(struct inode *inode)
588 } 711 }
589#endif 712#endif
590 ext4_discard_preallocations(inode); 713 ext4_discard_preallocations(inode);
591 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 714 if (EXT4_JOURNAL(inode))
715 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
592 &EXT4_I(inode)->jinode); 716 &EXT4_I(inode)->jinode);
593} 717}
594 718
@@ -681,10 +805,19 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
681#endif 805#endif
682 if (!test_opt(sb, RESERVATION)) 806 if (!test_opt(sb, RESERVATION))
683 seq_puts(seq, ",noreservation"); 807 seq_puts(seq, ",noreservation");
684 if (sbi->s_commit_interval) { 808 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
685 seq_printf(seq, ",commit=%u", 809 seq_printf(seq, ",commit=%u",
686 (unsigned) (sbi->s_commit_interval / HZ)); 810 (unsigned) (sbi->s_commit_interval / HZ));
687 } 811 }
812 if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
813 seq_printf(seq, ",min_batch_time=%u",
814 (unsigned) sbi->s_min_batch_time);
815 }
816 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
817 seq_printf(seq, ",max_batch_time=%u",
818 (unsigned) sbi->s_min_batch_time);
819 }
820
688 /* 821 /*
689 * We're changing the default of barrier mount option, so 822 * We're changing the default of barrier mount option, so
690 * let's always display its mount state so it's clear what its 823 * let's always display its mount state so it's clear what its
@@ -696,8 +829,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
696 seq_puts(seq, ",journal_async_commit"); 829 seq_puts(seq, ",journal_async_commit");
697 if (test_opt(sb, NOBH)) 830 if (test_opt(sb, NOBH))
698 seq_puts(seq, ",nobh"); 831 seq_puts(seq, ",nobh");
699 if (!test_opt(sb, EXTENTS))
700 seq_puts(seq, ",noextents");
701 if (test_opt(sb, I_VERSION)) 832 if (test_opt(sb, I_VERSION))
702 seq_puts(seq, ",i_version"); 833 seq_puts(seq, ",i_version");
703 if (!test_opt(sb, DELALLOC)) 834 if (!test_opt(sb, DELALLOC))
@@ -772,6 +903,25 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
772 ext4_nfs_get_inode); 903 ext4_nfs_get_inode);
773} 904}
774 905
906/*
907 * Try to release metadata pages (indirect blocks, directories) which are
908 * mapped via the block device. Since these pages could have journal heads
909 * which would prevent try_to_free_buffers() from freeing them, we must use
910 * jbd2 layer's try_to_free_buffers() function to release them.
911 */
912static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
913{
914 journal_t *journal = EXT4_SB(sb)->s_journal;
915
916 WARN_ON(PageChecked(page));
917 if (!page_has_buffers(page))
918 return 0;
919 if (journal)
920 return jbd2_journal_try_to_free_buffers(journal, page,
921 wait & ~__GFP_WAIT);
922 return try_to_free_buffers(page);
923}
924
775#ifdef CONFIG_QUOTA 925#ifdef CONFIG_QUOTA
776#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 926#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
777#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 927#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
@@ -803,7 +953,9 @@ static struct dquot_operations ext4_quota_operations = {
803 .acquire_dquot = ext4_acquire_dquot, 953 .acquire_dquot = ext4_acquire_dquot,
804 .release_dquot = ext4_release_dquot, 954 .release_dquot = ext4_release_dquot,
805 .mark_dirty = ext4_mark_dquot_dirty, 955 .mark_dirty = ext4_mark_dquot_dirty,
806 .write_info = ext4_write_info 956 .write_info = ext4_write_info,
957 .alloc_dquot = dquot_alloc,
958 .destroy_dquot = dquot_destroy,
807}; 959};
808 960
809static struct quotactl_ops ext4_qctl_operations = { 961static struct quotactl_ops ext4_qctl_operations = {
@@ -826,8 +978,8 @@ static const struct super_operations ext4_sops = {
826 .put_super = ext4_put_super, 978 .put_super = ext4_put_super,
827 .write_super = ext4_write_super, 979 .write_super = ext4_write_super,
828 .sync_fs = ext4_sync_fs, 980 .sync_fs = ext4_sync_fs,
829 .write_super_lockfs = ext4_write_super_lockfs, 981 .freeze_fs = ext4_freeze,
830 .unlockfs = ext4_unlockfs, 982 .unfreeze_fs = ext4_unfreeze,
831 .statfs = ext4_statfs, 983 .statfs = ext4_statfs,
832 .remount_fs = ext4_remount, 984 .remount_fs = ext4_remount,
833 .clear_inode = ext4_clear_inode, 985 .clear_inode = ext4_clear_inode,
@@ -836,6 +988,7 @@ static const struct super_operations ext4_sops = {
836 .quota_read = ext4_quota_read, 988 .quota_read = ext4_quota_read,
837 .quota_write = ext4_quota_write, 989 .quota_write = ext4_quota_write,
838#endif 990#endif
991 .bdev_try_to_free_page = bdev_try_to_free_page,
839}; 992};
840 993
841static const struct export_operations ext4_export_ops = { 994static const struct export_operations ext4_export_ops = {
@@ -850,16 +1003,17 @@ enum {
850 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1003 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
851 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1004 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
852 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 1005 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
853 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 1006 Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1007 Opt_journal_update, Opt_journal_dev,
854 Opt_journal_checksum, Opt_journal_async_commit, 1008 Opt_journal_checksum, Opt_journal_async_commit,
855 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1009 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
856 Opt_data_err_abort, Opt_data_err_ignore, 1010 Opt_data_err_abort, Opt_data_err_ignore,
857 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1011 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
858 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 1012 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
859 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 1013 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
860 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, 1014 Opt_grpquota, Opt_i_version,
861 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1015 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
862 Opt_inode_readahead_blks 1016 Opt_inode_readahead_blks, Opt_journal_ioprio
863}; 1017};
864 1018
865static const match_table_t tokens = { 1019static const match_table_t tokens = {
@@ -889,8 +1043,9 @@ static const match_table_t tokens = {
889 {Opt_nobh, "nobh"}, 1043 {Opt_nobh, "nobh"},
890 {Opt_bh, "bh"}, 1044 {Opt_bh, "bh"},
891 {Opt_commit, "commit=%u"}, 1045 {Opt_commit, "commit=%u"},
1046 {Opt_min_batch_time, "min_batch_time=%u"},
1047 {Opt_max_batch_time, "max_batch_time=%u"},
892 {Opt_journal_update, "journal=update"}, 1048 {Opt_journal_update, "journal=update"},
893 {Opt_journal_inum, "journal=%u"},
894 {Opt_journal_dev, "journal_dev=%u"}, 1049 {Opt_journal_dev, "journal_dev=%u"},
895 {Opt_journal_checksum, "journal_checksum"}, 1050 {Opt_journal_checksum, "journal_checksum"},
896 {Opt_journal_async_commit, "journal_async_commit"}, 1051 {Opt_journal_async_commit, "journal_async_commit"},
@@ -911,14 +1066,13 @@ static const match_table_t tokens = {
911 {Opt_quota, "quota"}, 1066 {Opt_quota, "quota"},
912 {Opt_usrquota, "usrquota"}, 1067 {Opt_usrquota, "usrquota"},
913 {Opt_barrier, "barrier=%u"}, 1068 {Opt_barrier, "barrier=%u"},
914 {Opt_extents, "extents"},
915 {Opt_noextents, "noextents"},
916 {Opt_i_version, "i_version"}, 1069 {Opt_i_version, "i_version"},
917 {Opt_stripe, "stripe=%u"}, 1070 {Opt_stripe, "stripe=%u"},
918 {Opt_resize, "resize"}, 1071 {Opt_resize, "resize"},
919 {Opt_delalloc, "delalloc"}, 1072 {Opt_delalloc, "delalloc"},
920 {Opt_nodelalloc, "nodelalloc"}, 1073 {Opt_nodelalloc, "nodelalloc"},
921 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1074 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1075 {Opt_journal_ioprio, "journal_ioprio=%u"},
922 {Opt_err, NULL}, 1076 {Opt_err, NULL},
923}; 1077};
924 1078
@@ -943,8 +1097,11 @@ static ext4_fsblk_t get_sb_block(void **data)
943 return sb_block; 1097 return sb_block;
944} 1098}
945 1099
1100#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1101
946static int parse_options(char *options, struct super_block *sb, 1102static int parse_options(char *options, struct super_block *sb,
947 unsigned int *inum, unsigned long *journal_devnum, 1103 unsigned long *journal_devnum,
1104 unsigned int *journal_ioprio,
948 ext4_fsblk_t *n_blocks_count, int is_remount) 1105 ext4_fsblk_t *n_blocks_count, int is_remount)
949{ 1106{
950 struct ext4_sb_info *sbi = EXT4_SB(sb); 1107 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -956,7 +1113,6 @@ static int parse_options(char *options, struct super_block *sb,
956 int qtype, qfmt; 1113 int qtype, qfmt;
957 char *qname; 1114 char *qname;
958#endif 1115#endif
959 ext4_fsblk_t last_block;
960 1116
961 if (!options) 1117 if (!options)
962 return 1; 1118 return 1;
@@ -1068,16 +1224,6 @@ static int parse_options(char *options, struct super_block *sb,
1068 } 1224 }
1069 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1225 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1070 break; 1226 break;
1071 case Opt_journal_inum:
1072 if (is_remount) {
1073 printk(KERN_ERR "EXT4-fs: cannot specify "
1074 "journal on remount\n");
1075 return 0;
1076 }
1077 if (match_int(&args[0], &option))
1078 return 0;
1079 *inum = option;
1080 break;
1081 case Opt_journal_dev: 1227 case Opt_journal_dev:
1082 if (is_remount) { 1228 if (is_remount) {
1083 printk(KERN_ERR "EXT4-fs: cannot specify " 1229 printk(KERN_ERR "EXT4-fs: cannot specify "
@@ -1107,6 +1253,22 @@ static int parse_options(char *options, struct super_block *sb,
1107 option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1253 option = JBD2_DEFAULT_MAX_COMMIT_AGE;
1108 sbi->s_commit_interval = HZ * option; 1254 sbi->s_commit_interval = HZ * option;
1109 break; 1255 break;
1256 case Opt_max_batch_time:
1257 if (match_int(&args[0], &option))
1258 return 0;
1259 if (option < 0)
1260 return 0;
1261 if (option == 0)
1262 option = EXT4_DEF_MAX_BATCH_TIME;
1263 sbi->s_max_batch_time = option;
1264 break;
1265 case Opt_min_batch_time:
1266 if (match_int(&args[0], &option))
1267 return 0;
1268 if (option < 0)
1269 return 0;
1270 sbi->s_min_batch_time = option;
1271 break;
1110 case Opt_data_journal: 1272 case Opt_data_journal:
1111 data_opt = EXT4_MOUNT_JOURNAL_DATA; 1273 data_opt = EXT4_MOUNT_JOURNAL_DATA;
1112 goto datacheck; 1274 goto datacheck;
@@ -1142,8 +1304,7 @@ static int parse_options(char *options, struct super_block *sb,
1142 case Opt_grpjquota: 1304 case Opt_grpjquota:
1143 qtype = GRPQUOTA; 1305 qtype = GRPQUOTA;
1144set_qf_name: 1306set_qf_name:
1145 if ((sb_any_quota_enabled(sb) || 1307 if (sb_any_quota_loaded(sb) &&
1146 sb_any_quota_suspended(sb)) &&
1147 !sbi->s_qf_names[qtype]) { 1308 !sbi->s_qf_names[qtype]) {
1148 printk(KERN_ERR 1309 printk(KERN_ERR
1149 "EXT4-fs: Cannot change journaled " 1310 "EXT4-fs: Cannot change journaled "
@@ -1182,8 +1343,7 @@ set_qf_name:
1182 case Opt_offgrpjquota: 1343 case Opt_offgrpjquota:
1183 qtype = GRPQUOTA; 1344 qtype = GRPQUOTA;
1184clear_qf_name: 1345clear_qf_name:
1185 if ((sb_any_quota_enabled(sb) || 1346 if (sb_any_quota_loaded(sb) &&
1186 sb_any_quota_suspended(sb)) &&
1187 sbi->s_qf_names[qtype]) { 1347 sbi->s_qf_names[qtype]) {
1188 printk(KERN_ERR "EXT4-fs: Cannot change " 1348 printk(KERN_ERR "EXT4-fs: Cannot change "
1189 "journaled quota options when " 1349 "journaled quota options when "
@@ -1202,8 +1362,7 @@ clear_qf_name:
1202 case Opt_jqfmt_vfsv0: 1362 case Opt_jqfmt_vfsv0:
1203 qfmt = QFMT_VFS_V0; 1363 qfmt = QFMT_VFS_V0;
1204set_qf_format: 1364set_qf_format:
1205 if ((sb_any_quota_enabled(sb) || 1365 if (sb_any_quota_loaded(sb) &&
1206 sb_any_quota_suspended(sb)) &&
1207 sbi->s_jquota_fmt != qfmt) { 1366 sbi->s_jquota_fmt != qfmt) {
1208 printk(KERN_ERR "EXT4-fs: Cannot change " 1367 printk(KERN_ERR "EXT4-fs: Cannot change "
1209 "journaled quota options when " 1368 "journaled quota options when "
@@ -1222,7 +1381,7 @@ set_qf_format:
1222 set_opt(sbi->s_mount_opt, GRPQUOTA); 1381 set_opt(sbi->s_mount_opt, GRPQUOTA);
1223 break; 1382 break;
1224 case Opt_noquota: 1383 case Opt_noquota:
1225 if (sb_any_quota_enabled(sb)) { 1384 if (sb_any_quota_loaded(sb)) {
1226 printk(KERN_ERR "EXT4-fs: Cannot change quota " 1385 printk(KERN_ERR "EXT4-fs: Cannot change quota "
1227 "options when quota turned on.\n"); 1386 "options when quota turned on.\n");
1228 return 0; 1387 return 0;
@@ -1280,33 +1439,6 @@ set_qf_format:
1280 case Opt_bh: 1439 case Opt_bh:
1281 clear_opt(sbi->s_mount_opt, NOBH); 1440 clear_opt(sbi->s_mount_opt, NOBH);
1282 break; 1441 break;
1283 case Opt_extents:
1284 if (!EXT4_HAS_INCOMPAT_FEATURE(sb,
1285 EXT4_FEATURE_INCOMPAT_EXTENTS)) {
1286 ext4_warning(sb, __func__,
1287 "extents feature not enabled "
1288 "on this filesystem, use tune2fs\n");
1289 return 0;
1290 }
1291 set_opt(sbi->s_mount_opt, EXTENTS);
1292 break;
1293 case Opt_noextents:
1294 /*
1295 * When e2fsprogs support resizing an already existing
1296 * ext3 file system to greater than 2**32 we need to
1297 * add support to block allocator to handle growing
1298 * already existing block mapped inode so that blocks
1299 * allocated for them fall within 2**32
1300 */
1301 last_block = ext4_blocks_count(sbi->s_es) - 1;
1302 if (last_block > 0xffffffffULL) {
1303 printk(KERN_ERR "EXT4-fs: Filesystem too "
1304 "large to mount with "
1305 "-o noextents options\n");
1306 return 0;
1307 }
1308 clear_opt(sbi->s_mount_opt, EXTENTS);
1309 break;
1310 case Opt_i_version: 1442 case Opt_i_version:
1311 set_opt(sbi->s_mount_opt, I_VERSION); 1443 set_opt(sbi->s_mount_opt, I_VERSION);
1312 sb->s_flags |= MS_I_VERSION; 1444 sb->s_flags |= MS_I_VERSION;
@@ -1331,6 +1463,14 @@ set_qf_format:
1331 return 0; 1463 return 0;
1332 sbi->s_inode_readahead_blks = option; 1464 sbi->s_inode_readahead_blks = option;
1333 break; 1465 break;
1466 case Opt_journal_ioprio:
1467 if (match_int(&args[0], &option))
1468 return 0;
1469 if (option < 0 || option > 7)
1470 break;
1471 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
1472 option);
1473 break;
1334 default: 1474 default:
1335 printk(KERN_ERR 1475 printk(KERN_ERR
1336 "EXT4-fs: Unrecognized mount option \"%s\" " 1476 "EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1406,24 +1546,19 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1406 printk(KERN_WARNING 1546 printk(KERN_WARNING
1407 "EXT4-fs warning: checktime reached, " 1547 "EXT4-fs warning: checktime reached, "
1408 "running e2fsck is recommended\n"); 1548 "running e2fsck is recommended\n");
1409#if 0 1549 if (!sbi->s_journal)
1410 /* @@@ We _will_ want to clear the valid bit if we find 1550 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1411 * inconsistencies, to force a fsck at reboot. But for
1412 * a plain journaled filesystem we can keep it set as
1413 * valid forever! :)
1414 */
1415 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1416#endif
1417 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1551 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1418 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1552 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1419 le16_add_cpu(&es->s_mnt_count, 1); 1553 le16_add_cpu(&es->s_mnt_count, 1);
1420 es->s_mtime = cpu_to_le32(get_seconds()); 1554 es->s_mtime = cpu_to_le32(get_seconds());
1421 ext4_update_dynamic_rev(sb); 1555 ext4_update_dynamic_rev(sb);
1422 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1556 if (sbi->s_journal)
1557 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1423 1558
1424 ext4_commit_super(sb, es, 1); 1559 ext4_commit_super(sb, es, 1);
1425 if (test_opt(sb, DEBUG)) 1560 if (test_opt(sb, DEBUG))
1426 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, " 1561 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1427 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1562 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
1428 sb->s_blocksize, 1563 sb->s_blocksize,
1429 sbi->s_groups_count, 1564 sbi->s_groups_count,
@@ -1431,9 +1566,13 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1431 EXT4_INODES_PER_GROUP(sb), 1566 EXT4_INODES_PER_GROUP(sb),
1432 sbi->s_mount_opt); 1567 sbi->s_mount_opt);
1433 1568
1434 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", 1569 if (EXT4_SB(sb)->s_journal) {
1435 sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : 1570 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
1436 "external", EXT4_SB(sb)->s_journal->j_devname); 1571 sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1572 "external", EXT4_SB(sb)->s_journal->j_devname);
1573 } else {
1574 printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
1575 }
1437 return res; 1576 return res;
1438} 1577}
1439 1578
@@ -1445,7 +1584,6 @@ static int ext4_fill_flex_info(struct super_block *sb)
1445 ext4_group_t flex_group_count; 1584 ext4_group_t flex_group_count;
1446 ext4_group_t flex_group; 1585 ext4_group_t flex_group;
1447 int groups_per_flex = 0; 1586 int groups_per_flex = 0;
1448 __u64 block_bitmap = 0;
1449 int i; 1587 int i;
1450 1588
1451 if (!sbi->s_es->s_log_groups_per_flex) { 1589 if (!sbi->s_es->s_log_groups_per_flex) {
@@ -1464,21 +1602,18 @@ static int ext4_fill_flex_info(struct super_block *sb)
1464 sizeof(struct flex_groups), GFP_KERNEL); 1602 sizeof(struct flex_groups), GFP_KERNEL);
1465 if (sbi->s_flex_groups == NULL) { 1603 if (sbi->s_flex_groups == NULL) {
1466 printk(KERN_ERR "EXT4-fs: not enough memory for " 1604 printk(KERN_ERR "EXT4-fs: not enough memory for "
1467 "%lu flex groups\n", flex_group_count); 1605 "%u flex groups\n", flex_group_count);
1468 goto failed; 1606 goto failed;
1469 } 1607 }
1470 1608
1471 gdp = ext4_get_group_desc(sb, 1, &bh);
1472 block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
1473
1474 for (i = 0; i < sbi->s_groups_count; i++) { 1609 for (i = 0; i < sbi->s_groups_count; i++) {
1475 gdp = ext4_get_group_desc(sb, i, &bh); 1610 gdp = ext4_get_group_desc(sb, i, &bh);
1476 1611
1477 flex_group = ext4_flex_group(sbi, i); 1612 flex_group = ext4_flex_group(sbi, i);
1478 sbi->s_flex_groups[flex_group].free_inodes += 1613 sbi->s_flex_groups[flex_group].free_inodes +=
1479 le16_to_cpu(gdp->bg_free_inodes_count); 1614 ext4_free_inodes_count(sb, gdp);
1480 sbi->s_flex_groups[flex_group].free_blocks += 1615 sbi->s_flex_groups[flex_group].free_blocks +=
1481 le16_to_cpu(gdp->bg_free_blocks_count); 1616 ext4_free_blks_count(sb, gdp);
1482 } 1617 }
1483 1618
1484 return 1; 1619 return 1;
@@ -1552,14 +1687,14 @@ static int ext4_check_descriptors(struct super_block *sb)
1552 block_bitmap = ext4_block_bitmap(sb, gdp); 1687 block_bitmap = ext4_block_bitmap(sb, gdp);
1553 if (block_bitmap < first_block || block_bitmap > last_block) { 1688 if (block_bitmap < first_block || block_bitmap > last_block) {
1554 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1689 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1555 "Block bitmap for group %lu not in group " 1690 "Block bitmap for group %u not in group "
1556 "(block %llu)!\n", i, block_bitmap); 1691 "(block %llu)!\n", i, block_bitmap);
1557 return 0; 1692 return 0;
1558 } 1693 }
1559 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1694 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1560 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1695 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1561 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1696 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1562 "Inode bitmap for group %lu not in group " 1697 "Inode bitmap for group %u not in group "
1563 "(block %llu)!\n", i, inode_bitmap); 1698 "(block %llu)!\n", i, inode_bitmap);
1564 return 0; 1699 return 0;
1565 } 1700 }
@@ -1567,14 +1702,14 @@ static int ext4_check_descriptors(struct super_block *sb)
1567 if (inode_table < first_block || 1702 if (inode_table < first_block ||
1568 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1703 inode_table + sbi->s_itb_per_group - 1 > last_block) {
1569 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1704 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1570 "Inode table for group %lu not in group " 1705 "Inode table for group %u not in group "
1571 "(block %llu)!\n", i, inode_table); 1706 "(block %llu)!\n", i, inode_table);
1572 return 0; 1707 return 0;
1573 } 1708 }
1574 spin_lock(sb_bgl_lock(sbi, i)); 1709 spin_lock(sb_bgl_lock(sbi, i));
1575 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1710 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1576 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1711 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1577 "Checksum for group %lu failed (%u!=%u)\n", 1712 "Checksum for group %u failed (%u!=%u)\n",
1578 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1713 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1579 gdp)), le16_to_cpu(gdp->bg_checksum)); 1714 gdp)), le16_to_cpu(gdp->bg_checksum));
1580 if (!(sb->s_flags & MS_RDONLY)) { 1715 if (!(sb->s_flags & MS_RDONLY)) {
@@ -1721,7 +1856,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
1721 /* small i_blocks in vfs inode? */ 1856 /* small i_blocks in vfs inode? */
1722 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1857 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1723 /* 1858 /*
1724 * CONFIG_LSF is not enabled implies the inode 1859 * CONFIG_LBD is not enabled implies the inode
1725 * i_block represent total blocks in 512 bytes 1860 * i_block represent total blocks in 512 bytes
1726 * 32 == size of vfs inode i_blocks * 8 1861 * 32 == size of vfs inode i_blocks * 8
1727 */ 1862 */
@@ -1764,7 +1899,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1764 1899
1765 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1900 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1766 /* 1901 /*
1767 * !has_huge_files or CONFIG_LSF is not enabled 1902 * !has_huge_files or CONFIG_LBD is not enabled
1768 * implies the inode i_block represent total blocks in 1903 * implies the inode i_block represent total blocks in
1769 * 512 bytes 32 == size of vfs inode i_blocks * 8 1904 * 512 bytes 32 == size of vfs inode i_blocks * 8
1770 */ 1905 */
@@ -1866,19 +2001,20 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1866 ext4_fsblk_t sb_block = get_sb_block(&data); 2001 ext4_fsblk_t sb_block = get_sb_block(&data);
1867 ext4_fsblk_t logical_sb_block; 2002 ext4_fsblk_t logical_sb_block;
1868 unsigned long offset = 0; 2003 unsigned long offset = 0;
1869 unsigned int journal_inum = 0;
1870 unsigned long journal_devnum = 0; 2004 unsigned long journal_devnum = 0;
1871 unsigned long def_mount_opts; 2005 unsigned long def_mount_opts;
1872 struct inode *root; 2006 struct inode *root;
1873 char *cp; 2007 char *cp;
2008 const char *descr;
1874 int ret = -EINVAL; 2009 int ret = -EINVAL;
1875 int blocksize; 2010 int blocksize;
1876 int db_count; 2011 unsigned int db_count;
1877 int i; 2012 unsigned int i;
1878 int needs_recovery, has_huge_files; 2013 int needs_recovery, has_huge_files;
1879 __le32 features; 2014 int features;
1880 __u64 blocks_count; 2015 __u64 blocks_count;
1881 int err; 2016 int err;
2017 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
1882 2018
1883 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 2019 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
1884 if (!sbi) 2020 if (!sbi)
@@ -1959,31 +2095,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1959 2095
1960 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 2096 sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
1961 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 2097 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
2098 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2099 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2100 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
1962 2101
1963 set_opt(sbi->s_mount_opt, RESERVATION); 2102 set_opt(sbi->s_mount_opt, RESERVATION);
1964 set_opt(sbi->s_mount_opt, BARRIER); 2103 set_opt(sbi->s_mount_opt, BARRIER);
1965 2104
1966 /* 2105 /*
1967 * turn on extents feature by default in ext4 filesystem
1968 * only if feature flag already set by mkfs or tune2fs.
1969 * Use -o noextents to turn it off
1970 */
1971 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
1972 set_opt(sbi->s_mount_opt, EXTENTS);
1973 else
1974 ext4_warning(sb, __func__,
1975 "extents feature not enabled on this filesystem, "
1976 "use tune2fs.\n");
1977
1978 /*
1979 * enable delayed allocation by default 2106 * enable delayed allocation by default
1980 * Use -o nodelalloc to turn it off 2107 * Use -o nodelalloc to turn it off
1981 */ 2108 */
1982 set_opt(sbi->s_mount_opt, DELALLOC); 2109 set_opt(sbi->s_mount_opt, DELALLOC);
1983 2110
1984 2111
1985 if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum, 2112 if (!parse_options((char *) data, sb, &journal_devnum,
1986 NULL, 0)) 2113 &journal_ioprio, NULL, 0))
1987 goto failed_mount; 2114 goto failed_mount;
1988 2115
1989 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2116 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -2005,15 +2132,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2005 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); 2132 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
2006 if (features) { 2133 if (features) {
2007 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " 2134 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
2008 "unsupported optional features (%x).\n", 2135 "unsupported optional features (%x).\n", sb->s_id,
2009 sb->s_id, le32_to_cpu(features)); 2136 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2137 ~EXT4_FEATURE_INCOMPAT_SUPP));
2010 goto failed_mount; 2138 goto failed_mount;
2011 } 2139 }
2012 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); 2140 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
2013 if (!(sb->s_flags & MS_RDONLY) && features) { 2141 if (!(sb->s_flags & MS_RDONLY) && features) {
2014 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " 2142 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
2015 "unsupported optional features (%x).\n", 2143 "unsupported optional features (%x).\n", sb->s_id,
2016 sb->s_id, le32_to_cpu(features)); 2144 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2145 ~EXT4_FEATURE_RO_COMPAT_SUPP));
2017 goto failed_mount; 2146 goto failed_mount;
2018 } 2147 }
2019 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 2148 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
@@ -2021,13 +2150,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2021 if (has_huge_files) { 2150 if (has_huge_files) {
2022 /* 2151 /*
2023 * Large file size enabled file system can only be 2152 * Large file size enabled file system can only be
2024 * mount if kernel is build with CONFIG_LSF 2153 * mount if kernel is build with CONFIG_LBD
2025 */ 2154 */
2026 if (sizeof(root->i_blocks) < sizeof(u64) && 2155 if (sizeof(root->i_blocks) < sizeof(u64) &&
2027 !(sb->s_flags & MS_RDONLY)) { 2156 !(sb->s_flags & MS_RDONLY)) {
2028 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " 2157 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
2029 "files cannot be mounted read-write " 2158 "files cannot be mounted read-write "
2030 "without CONFIG_LSF.\n", sb->s_id); 2159 "without CONFIG_LBD.\n", sb->s_id);
2031 goto failed_mount; 2160 goto failed_mount;
2032 } 2161 }
2033 } 2162 }
@@ -2118,6 +2247,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2118 for (i = 0; i < 4; i++) 2247 for (i = 0; i < 4; i++)
2119 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2248 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2120 sbi->s_def_hash_version = es->s_def_hash_version; 2249 sbi->s_def_hash_version = es->s_def_hash_version;
2250 i = le32_to_cpu(es->s_flags);
2251 if (i & EXT2_FLAGS_UNSIGNED_HASH)
2252 sbi->s_hash_unsigned = 3;
2253 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
2254#ifdef __CHAR_UNSIGNED__
2255 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
2256 sbi->s_hash_unsigned = 3;
2257#else
2258 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
2259#endif
2260 sb->s_dirt = 1;
2261 }
2121 2262
2122 if (sbi->s_blocks_per_group > blocksize * 8) { 2263 if (sbi->s_blocks_per_group > blocksize * 8) {
2123 printk(KERN_ERR 2264 printk(KERN_ERR
@@ -2145,20 +2286,30 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2145 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 2286 if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
2146 goto cantfind_ext4; 2287 goto cantfind_ext4;
2147 2288
2148 /* ensure blocks_count calculation below doesn't sign-extend */ 2289 /*
2149 if (ext4_blocks_count(es) + EXT4_BLOCKS_PER_GROUP(sb) < 2290 * It makes no sense for the first data block to be beyond the end
2150 le32_to_cpu(es->s_first_data_block) + 1) { 2291 * of the filesystem.
2151 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu, " 2292 */
2152 "first data block %u, blocks per group %lu\n", 2293 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
2153 ext4_blocks_count(es), 2294 printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
2154 le32_to_cpu(es->s_first_data_block), 2295 "block %u is beyond end of filesystem (%llu)\n",
2155 EXT4_BLOCKS_PER_GROUP(sb)); 2296 le32_to_cpu(es->s_first_data_block),
2297 ext4_blocks_count(es));
2156 goto failed_mount; 2298 goto failed_mount;
2157 } 2299 }
2158 blocks_count = (ext4_blocks_count(es) - 2300 blocks_count = (ext4_blocks_count(es) -
2159 le32_to_cpu(es->s_first_data_block) + 2301 le32_to_cpu(es->s_first_data_block) +
2160 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2302 EXT4_BLOCKS_PER_GROUP(sb) - 1);
2161 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 2303 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
2304 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
2305 printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
2306 "(block count %llu, first data block %u, "
2307 "blocks per group %lu)\n", sbi->s_groups_count,
2308 ext4_blocks_count(es),
2309 le32_to_cpu(es->s_first_data_block),
2310 EXT4_BLOCKS_PER_GROUP(sb));
2311 goto failed_mount;
2312 }
2162 sbi->s_groups_count = blocks_count; 2313 sbi->s_groups_count = blocks_count;
2163 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2314 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2164 EXT4_DESC_PER_BLOCK(sb); 2315 EXT4_DESC_PER_BLOCK(sb);
@@ -2270,27 +2421,26 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2270 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2421 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2271 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2422 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2272 ext4_commit_super(sb, es, 1); 2423 ext4_commit_super(sb, es, 1);
2273 printk(KERN_CRIT
2274 "EXT4-fs (device %s): mount failed\n",
2275 sb->s_id);
2276 goto failed_mount4; 2424 goto failed_mount4;
2277 } 2425 }
2278 } 2426 }
2279 } else if (journal_inum) { 2427 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2280 if (ext4_create_journal(sb, es, journal_inum)) 2428 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2281 goto failed_mount3; 2429 printk(KERN_ERR "EXT4-fs: required journal recovery "
2430 "suppressed and not mounted read-only\n");
2431 goto failed_mount4;
2282 } else { 2432 } else {
2283 if (!silent) 2433 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
2284 printk(KERN_ERR 2434 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
2285 "ext4: No journal on filesystem on %s\n", 2435 sbi->s_journal = NULL;
2286 sb->s_id); 2436 needs_recovery = 0;
2287 goto failed_mount3; 2437 goto no_journal;
2288 } 2438 }
2289 2439
2290 if (ext4_blocks_count(es) > 0xffffffffULL && 2440 if (ext4_blocks_count(es) > 0xffffffffULL &&
2291 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2441 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2292 JBD2_FEATURE_INCOMPAT_64BIT)) { 2442 JBD2_FEATURE_INCOMPAT_64BIT)) {
2293 printk(KERN_ERR "ext4: Failed to set 64-bit journal feature\n"); 2443 printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
2294 goto failed_mount4; 2444 goto failed_mount4;
2295 } 2445 }
2296 2446
@@ -2335,6 +2485,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2335 default: 2485 default:
2336 break; 2486 break;
2337 } 2487 }
2488 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
2489
2490no_journal:
2338 2491
2339 if (test_opt(sb, NOBH)) { 2492 if (test_opt(sb, NOBH)) {
2340 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2493 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
@@ -2420,13 +2573,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2420 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 2573 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
2421 ext4_orphan_cleanup(sb, es); 2574 ext4_orphan_cleanup(sb, es);
2422 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2575 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2423 if (needs_recovery) 2576 if (needs_recovery) {
2424 printk(KERN_INFO "EXT4-fs: recovery complete.\n"); 2577 printk(KERN_INFO "EXT4-fs: recovery complete.\n");
2425 ext4_mark_recovery_complete(sb, es); 2578 ext4_mark_recovery_complete(sb, es);
2426 printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", 2579 }
2427 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": 2580 if (EXT4_SB(sb)->s_journal) {
2428 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": 2581 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2429 "writeback"); 2582 descr = " journalled data mode";
2583 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2584 descr = " ordered data mode";
2585 else
2586 descr = " writeback data mode";
2587 } else
2588 descr = "out journal";
2589
2590 printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
2591 sb->s_id, descr);
2430 2592
2431 lock_kernel(); 2593 lock_kernel();
2432 return 0; 2594 return 0;
@@ -2438,8 +2600,11 @@ cantfind_ext4:
2438 goto failed_mount; 2600 goto failed_mount;
2439 2601
2440failed_mount4: 2602failed_mount4:
2441 jbd2_journal_destroy(sbi->s_journal); 2603 printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
2442 sbi->s_journal = NULL; 2604 if (sbi->s_journal) {
2605 jbd2_journal_destroy(sbi->s_journal);
2606 sbi->s_journal = NULL;
2607 }
2443failed_mount3: 2608failed_mount3:
2444 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2609 percpu_counter_destroy(&sbi->s_freeblocks_counter);
2445 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2610 percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@ -2476,11 +2641,9 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
2476{ 2641{
2477 struct ext4_sb_info *sbi = EXT4_SB(sb); 2642 struct ext4_sb_info *sbi = EXT4_SB(sb);
2478 2643
2479 if (sbi->s_commit_interval) 2644 journal->j_commit_interval = sbi->s_commit_interval;
2480 journal->j_commit_interval = sbi->s_commit_interval; 2645 journal->j_min_batch_time = sbi->s_min_batch_time;
2481 /* We could also set up an ext4-specific default for the commit 2646 journal->j_max_batch_time = sbi->s_max_batch_time;
2482 * interval here, but for now we'll just fall back to the jbd
2483 * default. */
2484 2647
2485 spin_lock(&journal->j_state_lock); 2648 spin_lock(&journal->j_state_lock);
2486 if (test_opt(sb, BARRIER)) 2649 if (test_opt(sb, BARRIER))
@@ -2500,6 +2663,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
2500 struct inode *journal_inode; 2663 struct inode *journal_inode;
2501 journal_t *journal; 2664 journal_t *journal;
2502 2665
2666 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2667
2503 /* First, test for the existence of a valid inode on disk. Bad 2668 /* First, test for the existence of a valid inode on disk. Bad
2504 * things happen if we iget() an unused inode, as the subsequent 2669 * things happen if we iget() an unused inode, as the subsequent
2505 * iput() will try to delete it. */ 2670 * iput() will try to delete it. */
@@ -2548,13 +2713,15 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2548 struct ext4_super_block *es; 2713 struct ext4_super_block *es;
2549 struct block_device *bdev; 2714 struct block_device *bdev;
2550 2715
2716 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2717
2551 bdev = ext4_blkdev_get(j_dev); 2718 bdev = ext4_blkdev_get(j_dev);
2552 if (bdev == NULL) 2719 if (bdev == NULL)
2553 return NULL; 2720 return NULL;
2554 2721
2555 if (bd_claim(bdev, sb)) { 2722 if (bd_claim(bdev, sb)) {
2556 printk(KERN_ERR 2723 printk(KERN_ERR
2557 "EXT4: failed to claim external journal device.\n"); 2724 "EXT4-fs: failed to claim external journal device.\n");
2558 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 2725 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2559 return NULL; 2726 return NULL;
2560 } 2727 }
@@ -2635,6 +2802,8 @@ static int ext4_load_journal(struct super_block *sb,
2635 int err = 0; 2802 int err = 0;
2636 int really_read_only; 2803 int really_read_only;
2637 2804
2805 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2806
2638 if (journal_devnum && 2807 if (journal_devnum &&
2639 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2808 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2640 printk(KERN_INFO "EXT4-fs: external journal device major/minor " 2809 printk(KERN_INFO "EXT4-fs: external journal device major/minor "
@@ -2719,55 +2888,14 @@ static int ext4_load_journal(struct super_block *sb,
2719 return 0; 2888 return 0;
2720} 2889}
2721 2890
2722static int ext4_create_journal(struct super_block *sb, 2891static int ext4_commit_super(struct super_block *sb,
2723 struct ext4_super_block *es,
2724 unsigned int journal_inum)
2725{
2726 journal_t *journal;
2727 int err;
2728
2729 if (sb->s_flags & MS_RDONLY) {
2730 printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to "
2731 "create journal.\n");
2732 return -EROFS;
2733 }
2734
2735 journal = ext4_get_journal(sb, journal_inum);
2736 if (!journal)
2737 return -EINVAL;
2738
2739 printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n",
2740 journal_inum);
2741
2742 err = jbd2_journal_create(journal);
2743 if (err) {
2744 printk(KERN_ERR "EXT4-fs: error creating journal.\n");
2745 jbd2_journal_destroy(journal);
2746 return -EIO;
2747 }
2748
2749 EXT4_SB(sb)->s_journal = journal;
2750
2751 ext4_update_dynamic_rev(sb);
2752 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
2753 EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL);
2754
2755 es->s_journal_inum = cpu_to_le32(journal_inum);
2756 sb->s_dirt = 1;
2757
2758 /* Make sure we flush the recovery flag to disk. */
2759 ext4_commit_super(sb, es, 1);
2760
2761 return 0;
2762}
2763
2764static void ext4_commit_super(struct super_block *sb,
2765 struct ext4_super_block *es, int sync) 2892 struct ext4_super_block *es, int sync)
2766{ 2893{
2767 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 2894 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
2895 int error = 0;
2768 2896
2769 if (!sbh) 2897 if (!sbh)
2770 return; 2898 return error;
2771 if (buffer_write_io_error(sbh)) { 2899 if (buffer_write_io_error(sbh)) {
2772 /* 2900 /*
2773 * Oh, dear. A previous attempt to write the 2901 * Oh, dear. A previous attempt to write the
@@ -2777,25 +2905,33 @@ static void ext4_commit_super(struct super_block *sb,
2777 * be remapped. Nothing we can do but to retry the 2905 * be remapped. Nothing we can do but to retry the
2778 * write and hope for the best. 2906 * write and hope for the best.
2779 */ 2907 */
2780 printk(KERN_ERR "ext4: previous I/O error to " 2908 printk(KERN_ERR "EXT4-fs: previous I/O error to "
2781 "superblock detected for %s.\n", sb->s_id); 2909 "superblock detected for %s.\n", sb->s_id);
2782 clear_buffer_write_io_error(sbh); 2910 clear_buffer_write_io_error(sbh);
2783 set_buffer_uptodate(sbh); 2911 set_buffer_uptodate(sbh);
2784 } 2912 }
2785 es->s_wtime = cpu_to_le32(get_seconds()); 2913 es->s_wtime = cpu_to_le32(get_seconds());
2786 ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); 2914 ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
2787 es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 2915 &EXT4_SB(sb)->s_freeblocks_counter));
2916 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
2917 &EXT4_SB(sb)->s_freeinodes_counter));
2918
2788 BUFFER_TRACE(sbh, "marking dirty"); 2919 BUFFER_TRACE(sbh, "marking dirty");
2789 mark_buffer_dirty(sbh); 2920 mark_buffer_dirty(sbh);
2790 if (sync) { 2921 if (sync) {
2791 sync_dirty_buffer(sbh); 2922 error = sync_dirty_buffer(sbh);
2792 if (buffer_write_io_error(sbh)) { 2923 if (error)
2793 printk(KERN_ERR "ext4: I/O error while writing " 2924 return error;
2925
2926 error = buffer_write_io_error(sbh);
2927 if (error) {
2928 printk(KERN_ERR "EXT4-fs: I/O error while writing "
2794 "superblock for %s.\n", sb->s_id); 2929 "superblock for %s.\n", sb->s_id);
2795 clear_buffer_write_io_error(sbh); 2930 clear_buffer_write_io_error(sbh);
2796 set_buffer_uptodate(sbh); 2931 set_buffer_uptodate(sbh);
2797 } 2932 }
2798 } 2933 }
2934 return error;
2799} 2935}
2800 2936
2801 2937
@@ -2809,6 +2945,10 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
2809{ 2945{
2810 journal_t *journal = EXT4_SB(sb)->s_journal; 2946 journal_t *journal = EXT4_SB(sb)->s_journal;
2811 2947
2948 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2949 BUG_ON(journal != NULL);
2950 return;
2951 }
2812 jbd2_journal_lock_updates(journal); 2952 jbd2_journal_lock_updates(journal);
2813 if (jbd2_journal_flush(journal) < 0) 2953 if (jbd2_journal_flush(journal) < 0)
2814 goto out; 2954 goto out;
@@ -2838,6 +2978,8 @@ static void ext4_clear_journal_err(struct super_block *sb,
2838 int j_errno; 2978 int j_errno;
2839 const char *errstr; 2979 const char *errstr;
2840 2980
2981 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2982
2841 journal = EXT4_SB(sb)->s_journal; 2983 journal = EXT4_SB(sb)->s_journal;
2842 2984
2843 /* 2985 /*
@@ -2870,14 +3012,17 @@ static void ext4_clear_journal_err(struct super_block *sb,
2870int ext4_force_commit(struct super_block *sb) 3012int ext4_force_commit(struct super_block *sb)
2871{ 3013{
2872 journal_t *journal; 3014 journal_t *journal;
2873 int ret; 3015 int ret = 0;
2874 3016
2875 if (sb->s_flags & MS_RDONLY) 3017 if (sb->s_flags & MS_RDONLY)
2876 return 0; 3018 return 0;
2877 3019
2878 journal = EXT4_SB(sb)->s_journal; 3020 journal = EXT4_SB(sb)->s_journal;
2879 sb->s_dirt = 0; 3021 if (journal) {
2880 ret = ext4_journal_force_commit(journal); 3022 sb->s_dirt = 0;
3023 ret = ext4_journal_force_commit(journal);
3024 }
3025
2881 return ret; 3026 return ret;
2882} 3027}
2883 3028
@@ -2889,9 +3034,13 @@ int ext4_force_commit(struct super_block *sb)
2889 */ 3034 */
2890static void ext4_write_super(struct super_block *sb) 3035static void ext4_write_super(struct super_block *sb)
2891{ 3036{
2892 if (mutex_trylock(&sb->s_lock) != 0) 3037 if (EXT4_SB(sb)->s_journal) {
2893 BUG(); 3038 if (mutex_trylock(&sb->s_lock) != 0)
2894 sb->s_dirt = 0; 3039 BUG();
3040 sb->s_dirt = 0;
3041 } else {
3042 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3043 }
2895} 3044}
2896 3045
2897static int ext4_sync_fs(struct super_block *sb, int wait) 3046static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -2900,10 +3049,14 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
2900 3049
2901 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 3050 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
2902 sb->s_dirt = 0; 3051 sb->s_dirt = 0;
2903 if (wait) 3052 if (EXT4_SB(sb)->s_journal) {
2904 ret = ext4_force_commit(sb); 3053 if (wait)
2905 else 3054 ret = ext4_force_commit(sb);
2906 jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); 3055 else
3056 jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
3057 } else {
3058 ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
3059 }
2907 return ret; 3060 return ret;
2908} 3061}
2909 3062
@@ -2911,36 +3064,48 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
2911 * LVM calls this function before a (read-only) snapshot is created. This 3064 * LVM calls this function before a (read-only) snapshot is created. This
2912 * gives us a chance to flush the journal completely and mark the fs clean. 3065 * gives us a chance to flush the journal completely and mark the fs clean.
2913 */ 3066 */
2914static void ext4_write_super_lockfs(struct super_block *sb) 3067static int ext4_freeze(struct super_block *sb)
2915{ 3068{
3069 int error = 0;
3070 journal_t *journal;
2916 sb->s_dirt = 0; 3071 sb->s_dirt = 0;
2917 3072
2918 if (!(sb->s_flags & MS_RDONLY)) { 3073 if (!(sb->s_flags & MS_RDONLY)) {
2919 journal_t *journal = EXT4_SB(sb)->s_journal; 3074 journal = EXT4_SB(sb)->s_journal;
2920 3075
2921 /* Now we set up the journal barrier. */ 3076 if (journal) {
2922 jbd2_journal_lock_updates(journal); 3077 /* Now we set up the journal barrier. */
3078 jbd2_journal_lock_updates(journal);
2923 3079
2924 /* 3080 /*
2925 * We don't want to clear needs_recovery flag when we failed 3081 * We don't want to clear needs_recovery flag when we
2926 * to flush the journal. 3082 * failed to flush the journal.
2927 */ 3083 */
2928 if (jbd2_journal_flush(journal) < 0) 3084 error = jbd2_journal_flush(journal);
2929 return; 3085 if (error < 0)
3086 goto out;
3087 }
2930 3088
2931 /* Journal blocked and flushed, clear needs_recovery flag. */ 3089 /* Journal blocked and flushed, clear needs_recovery flag. */
2932 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3090 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
2933 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3091 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3092 error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3093 if (error)
3094 goto out;
2934 } 3095 }
3096 return 0;
3097out:
3098 jbd2_journal_unlock_updates(journal);
3099 return error;
2935} 3100}
2936 3101
2937/* 3102/*
2938 * Called by LVM after the snapshot is done. We need to reset the RECOVER 3103 * Called by LVM after the snapshot is done. We need to reset the RECOVER
2939 * flag here, even though the filesystem is not technically dirty yet. 3104 * flag here, even though the filesystem is not technically dirty yet.
2940 */ 3105 */
2941static void ext4_unlockfs(struct super_block *sb) 3106static int ext4_unfreeze(struct super_block *sb)
2942{ 3107{
2943 if (!(sb->s_flags & MS_RDONLY)) { 3108 if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) {
2944 lock_super(sb); 3109 lock_super(sb);
2945 /* Reser the needs_recovery flag before the fs is unlocked. */ 3110 /* Reser the needs_recovery flag before the fs is unlocked. */
2946 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3111 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -2948,6 +3113,7 @@ static void ext4_unlockfs(struct super_block *sb)
2948 unlock_super(sb); 3113 unlock_super(sb);
2949 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3114 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
2950 } 3115 }
3116 return 0;
2951} 3117}
2952 3118
2953static int ext4_remount(struct super_block *sb, int *flags, char *data) 3119static int ext4_remount(struct super_block *sb, int *flags, char *data)
@@ -2958,6 +3124,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
2958 unsigned long old_sb_flags; 3124 unsigned long old_sb_flags;
2959 struct ext4_mount_options old_opts; 3125 struct ext4_mount_options old_opts;
2960 ext4_group_t g; 3126 ext4_group_t g;
3127 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
2961 int err; 3128 int err;
2962#ifdef CONFIG_QUOTA 3129#ifdef CONFIG_QUOTA
2963 int i; 3130 int i;
@@ -2969,16 +3136,21 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
2969 old_opts.s_resuid = sbi->s_resuid; 3136 old_opts.s_resuid = sbi->s_resuid;
2970 old_opts.s_resgid = sbi->s_resgid; 3137 old_opts.s_resgid = sbi->s_resgid;
2971 old_opts.s_commit_interval = sbi->s_commit_interval; 3138 old_opts.s_commit_interval = sbi->s_commit_interval;
3139 old_opts.s_min_batch_time = sbi->s_min_batch_time;
3140 old_opts.s_max_batch_time = sbi->s_max_batch_time;
2972#ifdef CONFIG_QUOTA 3141#ifdef CONFIG_QUOTA
2973 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 3142 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
2974 for (i = 0; i < MAXQUOTAS; i++) 3143 for (i = 0; i < MAXQUOTAS; i++)
2975 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 3144 old_opts.s_qf_names[i] = sbi->s_qf_names[i];
2976#endif 3145#endif
3146 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
3147 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
2977 3148
2978 /* 3149 /*
2979 * Allow the "check" option to be passed as a remount option. 3150 * Allow the "check" option to be passed as a remount option.
2980 */ 3151 */
2981 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { 3152 if (!parse_options(data, sb, NULL, &journal_ioprio,
3153 &n_blocks_count, 1)) {
2982 err = -EINVAL; 3154 err = -EINVAL;
2983 goto restore_opts; 3155 goto restore_opts;
2984 } 3156 }
@@ -2991,7 +3163,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
2991 3163
2992 es = sbi->s_es; 3164 es = sbi->s_es;
2993 3165
2994 ext4_init_journal_params(sb, sbi->s_journal); 3166 if (sbi->s_journal) {
3167 ext4_init_journal_params(sb, sbi->s_journal);
3168 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3169 }
2995 3170
2996 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 3171 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
2997 n_blocks_count > ext4_blocks_count(es)) { 3172 n_blocks_count > ext4_blocks_count(es)) {
@@ -3020,17 +3195,20 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3020 * We have to unlock super so that we can wait for 3195 * We have to unlock super so that we can wait for
3021 * transactions. 3196 * transactions.
3022 */ 3197 */
3023 unlock_super(sb); 3198 if (sbi->s_journal) {
3024 ext4_mark_recovery_complete(sb, es); 3199 unlock_super(sb);
3025 lock_super(sb); 3200 ext4_mark_recovery_complete(sb, es);
3201 lock_super(sb);
3202 }
3026 } else { 3203 } else {
3027 __le32 ret; 3204 int ret;
3028 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3205 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3029 ~EXT4_FEATURE_RO_COMPAT_SUPP))) { 3206 ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
3030 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3207 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3031 "remount RDWR because of unsupported " 3208 "remount RDWR because of unsupported "
3032 "optional features (%x).\n", 3209 "optional features (%x).\n", sb->s_id,
3033 sb->s_id, le32_to_cpu(ret)); 3210 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
3211 ~EXT4_FEATURE_RO_COMPAT_SUPP));
3034 err = -EROFS; 3212 err = -EROFS;
3035 goto restore_opts; 3213 goto restore_opts;
3036 } 3214 }
@@ -3047,7 +3225,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3047 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 3225 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3048 printk(KERN_ERR 3226 printk(KERN_ERR
3049 "EXT4-fs: ext4_remount: " 3227 "EXT4-fs: ext4_remount: "
3050 "Checksum for group %lu failed (%u!=%u)\n", 3228 "Checksum for group %u failed (%u!=%u)\n",
3051 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 3229 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3052 le16_to_cpu(gdp->bg_checksum)); 3230 le16_to_cpu(gdp->bg_checksum));
3053 err = -EINVAL; 3231 err = -EINVAL;
@@ -3076,7 +3254,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3076 * been changed by e2fsck since we originally mounted 3254 * been changed by e2fsck since we originally mounted
3077 * the partition.) 3255 * the partition.)
3078 */ 3256 */
3079 ext4_clear_journal_err(sb, es); 3257 if (sbi->s_journal)
3258 ext4_clear_journal_err(sb, es);
3080 sbi->s_mount_state = le16_to_cpu(es->s_state); 3259 sbi->s_mount_state = le16_to_cpu(es->s_state);
3081 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3260 if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3082 goto restore_opts; 3261 goto restore_opts;
@@ -3084,6 +3263,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
3084 sb->s_flags &= ~MS_RDONLY; 3263 sb->s_flags &= ~MS_RDONLY;
3085 } 3264 }
3086 } 3265 }
3266 if (sbi->s_journal == NULL)
3267 ext4_commit_super(sb, es, 1);
3268
3087#ifdef CONFIG_QUOTA 3269#ifdef CONFIG_QUOTA
3088 /* Release old quota file names */ 3270 /* Release old quota file names */
3089 for (i = 0; i < MAXQUOTAS; i++) 3271 for (i = 0; i < MAXQUOTAS; i++)
@@ -3098,6 +3280,8 @@ restore_opts:
3098 sbi->s_resuid = old_opts.s_resuid; 3280 sbi->s_resuid = old_opts.s_resuid;
3099 sbi->s_resgid = old_opts.s_resgid; 3281 sbi->s_resgid = old_opts.s_resgid;
3100 sbi->s_commit_interval = old_opts.s_commit_interval; 3282 sbi->s_commit_interval = old_opts.s_commit_interval;
3283 sbi->s_min_batch_time = old_opts.s_min_batch_time;
3284 sbi->s_max_batch_time = old_opts.s_max_batch_time;
3101#ifdef CONFIG_QUOTA 3285#ifdef CONFIG_QUOTA
3102 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 3286 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
3103 for (i = 0; i < MAXQUOTAS; i++) { 3287 for (i = 0; i < MAXQUOTAS; i++) {
@@ -3360,7 +3544,8 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3360 * When we journal data on quota file, we have to flush journal to see 3544 * When we journal data on quota file, we have to flush journal to see
3361 * all updates to the file when we bypass pagecache... 3545 * all updates to the file when we bypass pagecache...
3362 */ 3546 */
3363 if (ext4_should_journal_data(path.dentry->d_inode)) { 3547 if (EXT4_SB(sb)->s_journal &&
3548 ext4_should_journal_data(path.dentry->d_inode)) {
3364 /* 3549 /*
3365 * We don't need to lock updates but journal_flush() could 3550 * We don't need to lock updates but journal_flush() could
3366 * otherwise be livelocked... 3551 * otherwise be livelocked...
@@ -3434,7 +3619,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3434 struct buffer_head *bh; 3619 struct buffer_head *bh;
3435 handle_t *handle = journal_current_handle(); 3620 handle_t *handle = journal_current_handle();
3436 3621
3437 if (!handle) { 3622 if (EXT4_SB(sb)->s_journal && !handle) {
3438 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" 3623 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
3439 " cancelled because transaction is not started.\n", 3624 " cancelled because transaction is not started.\n",
3440 (unsigned long long)off, (unsigned long long)len); 3625 (unsigned long long)off, (unsigned long long)len);
@@ -3459,7 +3644,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3459 flush_dcache_page(bh->b_page); 3644 flush_dcache_page(bh->b_page);
3460 unlock_buffer(bh); 3645 unlock_buffer(bh);
3461 if (journal_quota) 3646 if (journal_quota)
3462 err = ext4_journal_dirty_metadata(handle, bh); 3647 err = ext4_handle_dirty_metadata(handle, NULL, bh);
3463 else { 3648 else {
3464 /* Always do at least ordered writes for quotas */ 3649 /* Always do at least ordered writes for quotas */
3465 err = ext4_jbd2_file_inode(handle, inode); 3650 err = ext4_jbd2_file_inode(handle, inode);
@@ -3513,18 +3698,15 @@ static int ext4_ui_proc_open(struct inode *inode, struct file *file)
3513static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf, 3698static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
3514 size_t cnt, loff_t *ppos) 3699 size_t cnt, loff_t *ppos)
3515{ 3700{
3516 unsigned int *p = PDE(file->f_path.dentry->d_inode)->data; 3701 unsigned long *p = PDE(file->f_path.dentry->d_inode)->data;
3517 char str[32]; 3702 char str[32];
3518 unsigned long value;
3519 3703
3520 if (cnt >= sizeof(str)) 3704 if (cnt >= sizeof(str))
3521 return -EINVAL; 3705 return -EINVAL;
3522 if (copy_from_user(str, buf, cnt)) 3706 if (copy_from_user(str, buf, cnt))
3523 return -EFAULT; 3707 return -EFAULT;
3524 value = simple_strtol(str, NULL, 0); 3708
3525 if (value < 0) 3709 *p = simple_strtoul(str, NULL, 0);
3526 return -ERANGE;
3527 *p = value;
3528 return cnt; 3710 return cnt;
3529} 3711}
3530 3712
@@ -3615,7 +3797,7 @@ static void __exit exit_ext4_fs(void)
3615} 3797}
3616 3798
3617MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 3799MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
3618MODULE_DESCRIPTION("Fourth Extended Filesystem with extents"); 3800MODULE_DESCRIPTION("Fourth Extended Filesystem");
3619MODULE_LICENSE("GPL"); 3801MODULE_LICENSE("GPL");
3620module_init(init_ext4_fs) 3802module_init(init_ext4_fs)
3621module_exit(exit_ext4_fs) 3803module_exit(exit_ext4_fs)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 80626d516fee..157ce6589c54 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -457,7 +457,7 @@ static void ext4_xattr_update_super_block(handle_t *handle,
457 if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { 457 if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
458 EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); 458 EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
459 sb->s_dirt = 1; 459 sb->s_dirt = 1;
460 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); 460 ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
461 } 461 }
462} 462}
463 463
@@ -487,9 +487,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
487 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 487 ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
488 } else { 488 } else {
489 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 489 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
490 error = ext4_journal_dirty_metadata(handle, bh); 490 error = ext4_handle_dirty_metadata(handle, inode, bh);
491 if (IS_SYNC(inode)) 491 if (IS_SYNC(inode))
492 handle->h_sync = 1; 492 ext4_handle_sync(handle);
493 DQUOT_FREE_BLOCK(inode, 1); 493 DQUOT_FREE_BLOCK(inode, 1);
494 ea_bdebug(bh, "refcount now=%d; releasing", 494 ea_bdebug(bh, "refcount now=%d; releasing",
495 le32_to_cpu(BHDR(bh)->h_refcount)); 495 le32_to_cpu(BHDR(bh)->h_refcount));
@@ -724,8 +724,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
724 if (error == -EIO) 724 if (error == -EIO)
725 goto bad_block; 725 goto bad_block;
726 if (!error) 726 if (!error)
727 error = ext4_journal_dirty_metadata(handle, 727 error = ext4_handle_dirty_metadata(handle,
728 bs->bh); 728 inode,
729 bs->bh);
729 if (error) 730 if (error)
730 goto cleanup; 731 goto cleanup;
731 goto inserted; 732 goto inserted;
@@ -794,8 +795,9 @@ inserted:
794 ea_bdebug(new_bh, "reusing; refcount now=%d", 795 ea_bdebug(new_bh, "reusing; refcount now=%d",
795 le32_to_cpu(BHDR(new_bh)->h_refcount)); 796 le32_to_cpu(BHDR(new_bh)->h_refcount));
796 unlock_buffer(new_bh); 797 unlock_buffer(new_bh);
797 error = ext4_journal_dirty_metadata(handle, 798 error = ext4_handle_dirty_metadata(handle,
798 new_bh); 799 inode,
800 new_bh);
799 if (error) 801 if (error)
800 goto cleanup_dquot; 802 goto cleanup_dquot;
801 } 803 }
@@ -810,8 +812,8 @@ inserted:
810 /* We need to allocate a new block */ 812 /* We need to allocate a new block */
811 ext4_fsblk_t goal = ext4_group_first_block_no(sb, 813 ext4_fsblk_t goal = ext4_group_first_block_no(sb,
812 EXT4_I(inode)->i_block_group); 814 EXT4_I(inode)->i_block_group);
813 ext4_fsblk_t block = ext4_new_meta_block(handle, inode, 815 ext4_fsblk_t block = ext4_new_meta_blocks(handle, inode,
814 goal, &error); 816 goal, NULL, &error);
815 if (error) 817 if (error)
816 goto cleanup; 818 goto cleanup;
817 ea_idebug(inode, "creating block %d", block); 819 ea_idebug(inode, "creating block %d", block);
@@ -833,7 +835,8 @@ getblk_failed:
833 set_buffer_uptodate(new_bh); 835 set_buffer_uptodate(new_bh);
834 unlock_buffer(new_bh); 836 unlock_buffer(new_bh);
835 ext4_xattr_cache_insert(new_bh); 837 ext4_xattr_cache_insert(new_bh);
836 error = ext4_journal_dirty_metadata(handle, new_bh); 838 error = ext4_handle_dirty_metadata(handle,
839 inode, new_bh);
837 if (error) 840 if (error)
838 goto cleanup; 841 goto cleanup;
839 } 842 }
@@ -1040,7 +1043,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1040 */ 1043 */
1041 is.iloc.bh = NULL; 1044 is.iloc.bh = NULL;
1042 if (IS_SYNC(inode)) 1045 if (IS_SYNC(inode))
1043 handle->h_sync = 1; 1046 ext4_handle_sync(handle);
1044 } 1047 }
1045 1048
1046cleanup: 1049cleanup: