aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Makefile10
-rw-r--r--fs/ext4/acl.h12
-rw-r--r--fs/ext4/balloc.c1457
-rw-r--r--fs/ext4/bitmap.c6
-rw-r--r--fs/ext4/dir.c64
-rw-r--r--fs/ext4/ext4.h133
-rw-r--r--fs/ext4/ext4_extents.h15
-rw-r--r--fs/ext4/ext4_i.h39
-rw-r--r--fs/ext4/ext4_sb.h25
-rw-r--r--fs/ext4/extents.c281
-rw-r--r--fs/ext4/file.c10
-rw-r--r--fs/ext4/fsync.c7
-rw-r--r--fs/ext4/hash.c8
-rw-r--r--fs/ext4/ialloc.c71
-rw-r--r--fs/ext4/inode.c620
-rw-r--r--fs/ext4/ioctl.c96
-rw-r--r--fs/ext4/mballoc.c220
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/migrate.c10
-rw-r--r--fs/ext4/namei.c402
-rw-r--r--fs/ext4/resize.c33
-rw-r--r--fs/ext4/super.c315
-rw-r--r--fs/ext4/symlink.c8
-rw-r--r--fs/ext4/xattr.c14
-rw-r--r--fs/ext4/xattr.h12
25 files changed, 1533 insertions, 2336 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index ac6fa8ca0a2f..a8ff003a00f7 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -2,12 +2,12 @@
2# Makefile for the linux ext4-filesystem routines. 2# Makefile for the linux ext4-filesystem routines.
3# 3#
4 4
5obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o 5obj-$(CONFIG_EXT4_FS) += ext4.o
6 6
7ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o 9 ext4_jbd2.o migrate.o mballoc.o
10 10
11ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
12ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o 12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
13ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o 13ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index cd2b855a07d6..cb45257a246e 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t size)
51 } 51 }
52} 52}
53 53
54#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl 56/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
57 if the ACL has not been cached */ 57 if the ACL has not been cached */
58#define EXT4_ACL_NOT_CACHED ((void *)-1) 58#define EXT4_ACL_NOT_CACHED ((void *)-1)
59 59
60/* acl.c */ 60/* acl.c */
61extern int ext4_permission (struct inode *, int); 61extern int ext4_permission(struct inode *, int);
62extern int ext4_acl_chmod (struct inode *); 62extern int ext4_acl_chmod(struct inode *);
63extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); 63extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
64 64
65#else /* CONFIG_EXT4DEV_FS_POSIX_ACL */ 65#else /* CONFIG_EXT4_FS_POSIX_ACL */
66#include <linux/sched.h> 66#include <linux/sched.h>
67#define ext4_permission NULL 67#define ext4_permission NULL
68 68
@@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
77{ 77{
78 return 0; 78 return 0;
79} 79}
80#endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */ 80#endif /* CONFIG_EXT4_FS_POSIX_ACL */
81 81
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e9fa960ba6da..bd2ece228827 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -83,6 +83,7 @@ static int ext4_group_used_meta_blocks(struct super_block *sb,
83 } 83 }
84 return used_blocks; 84 return used_blocks;
85} 85}
86
86/* Initializes an uninitialized block bitmap if given, and returns the 87/* Initializes an uninitialized block bitmap if given, and returns the
87 * number of blocks free in the group. */ 88 * number of blocks free in the group. */
88unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 89unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
@@ -132,7 +133,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
132 */ 133 */
133 group_blocks = ext4_blocks_count(sbi->s_es) - 134 group_blocks = ext4_blocks_count(sbi->s_es) -
134 le32_to_cpu(sbi->s_es->s_first_data_block) - 135 le32_to_cpu(sbi->s_es->s_first_data_block) -
135 (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); 136 (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
136 } else { 137 } else {
137 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 138 group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
138 } 139 }
@@ -200,20 +201,20 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
200 * @bh: pointer to the buffer head to store the block 201 * @bh: pointer to the buffer head to store the block
201 * group descriptor 202 * group descriptor
202 */ 203 */
203struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 204struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
204 ext4_group_t block_group, 205 ext4_group_t block_group,
205 struct buffer_head ** bh) 206 struct buffer_head **bh)
206{ 207{
207 unsigned long group_desc; 208 unsigned long group_desc;
208 unsigned long offset; 209 unsigned long offset;
209 struct ext4_group_desc * desc; 210 struct ext4_group_desc *desc;
210 struct ext4_sb_info *sbi = EXT4_SB(sb); 211 struct ext4_sb_info *sbi = EXT4_SB(sb);
211 212
212 if (block_group >= sbi->s_groups_count) { 213 if (block_group >= sbi->s_groups_count) {
213 ext4_error (sb, "ext4_get_group_desc", 214 ext4_error(sb, "ext4_get_group_desc",
214 "block_group >= groups_count - " 215 "block_group >= groups_count - "
215 "block_group = %lu, groups_count = %lu", 216 "block_group = %lu, groups_count = %lu",
216 block_group, sbi->s_groups_count); 217 block_group, sbi->s_groups_count);
217 218
218 return NULL; 219 return NULL;
219 } 220 }
@@ -222,10 +223,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
222 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); 223 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
223 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); 224 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
224 if (!sbi->s_group_desc[group_desc]) { 225 if (!sbi->s_group_desc[group_desc]) {
225 ext4_error (sb, "ext4_get_group_desc", 226 ext4_error(sb, "ext4_get_group_desc",
226 "Group descriptor not loaded - " 227 "Group descriptor not loaded - "
227 "block_group = %lu, group_desc = %lu, desc = %lu", 228 "block_group = %lu, group_desc = %lu, desc = %lu",
228 block_group, group_desc, offset); 229 block_group, group_desc, offset);
229 return NULL; 230 return NULL;
230 } 231 }
231 232
@@ -302,8 +303,8 @@ err_out:
302struct buffer_head * 303struct buffer_head *
303ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) 304ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
304{ 305{
305 struct ext4_group_desc * desc; 306 struct ext4_group_desc *desc;
306 struct buffer_head * bh = NULL; 307 struct buffer_head *bh = NULL;
307 ext4_fsblk_t bitmap_blk; 308 ext4_fsblk_t bitmap_blk;
308 309
309 desc = ext4_get_group_desc(sb, block_group, NULL); 310 desc = ext4_get_group_desc(sb, block_group, NULL);
@@ -318,9 +319,11 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
318 block_group, bitmap_blk); 319 block_group, bitmap_blk);
319 return NULL; 320 return NULL;
320 } 321 }
321 if (bh_uptodate_or_lock(bh)) 322 if (buffer_uptodate(bh) &&
323 !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
322 return bh; 324 return bh;
323 325
326 lock_buffer(bh);
324 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 327 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
325 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 328 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
326 ext4_init_block_bitmap(sb, bh, block_group, desc); 329 ext4_init_block_bitmap(sb, bh, block_group, desc);
@@ -345,301 +348,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
345 */ 348 */
346 return bh; 349 return bh;
347} 350}
348/*
349 * The reservation window structure operations
350 * --------------------------------------------
351 * Operations include:
352 * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
353 *
354 * We use a red-black tree to represent per-filesystem reservation
355 * windows.
356 *
357 */
358
359/**
360 * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
361 * @rb_root: root of per-filesystem reservation rb tree
362 * @verbose: verbose mode
363 * @fn: function which wishes to dump the reservation map
364 *
365 * If verbose is turned on, it will print the whole block reservation
366 * windows(start, end). Otherwise, it will only print out the "bad" windows,
367 * those windows that overlap with their immediate neighbors.
368 */
369#if 1
370static void __rsv_window_dump(struct rb_root *root, int verbose,
371 const char *fn)
372{
373 struct rb_node *n;
374 struct ext4_reserve_window_node *rsv, *prev;
375 int bad;
376
377restart:
378 n = rb_first(root);
379 bad = 0;
380 prev = NULL;
381
382 printk("Block Allocation Reservation Windows Map (%s):\n", fn);
383 while (n) {
384 rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
385 if (verbose)
386 printk("reservation window 0x%p "
387 "start: %llu, end: %llu\n",
388 rsv, rsv->rsv_start, rsv->rsv_end);
389 if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
390 printk("Bad reservation %p (start >= end)\n",
391 rsv);
392 bad = 1;
393 }
394 if (prev && prev->rsv_end >= rsv->rsv_start) {
395 printk("Bad reservation %p (prev->end >= start)\n",
396 rsv);
397 bad = 1;
398 }
399 if (bad) {
400 if (!verbose) {
401 printk("Restarting reservation walk in verbose mode\n");
402 verbose = 1;
403 goto restart;
404 }
405 }
406 n = rb_next(n);
407 prev = rsv;
408 }
409 printk("Window map complete.\n");
410 BUG_ON(bad);
411}
412#define rsv_window_dump(root, verbose) \
413 __rsv_window_dump((root), (verbose), __func__)
414#else
415#define rsv_window_dump(root, verbose) do {} while (0)
416#endif
417
418/**
419 * goal_in_my_reservation()
420 * @rsv: inode's reservation window
421 * @grp_goal: given goal block relative to the allocation block group
422 * @group: the current allocation block group
423 * @sb: filesystem super block
424 *
425 * Test if the given goal block (group relative) is within the file's
426 * own block reservation window range.
427 *
428 * If the reservation window is outside the goal allocation group, return 0;
429 * grp_goal (given goal block) could be -1, which means no specific
430 * goal block. In this case, always return 1.
431 * If the goal block is within the reservation window, return 1;
432 * otherwise, return 0;
433 */
434static int
435goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
436 ext4_group_t group, struct super_block *sb)
437{
438 ext4_fsblk_t group_first_block, group_last_block;
439
440 group_first_block = ext4_group_first_block_no(sb, group);
441 group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
442
443 if ((rsv->_rsv_start > group_last_block) ||
444 (rsv->_rsv_end < group_first_block))
445 return 0;
446 if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
447 || (grp_goal + group_first_block > rsv->_rsv_end)))
448 return 0;
449 return 1;
450}
451
452/**
453 * search_reserve_window()
454 * @rb_root: root of reservation tree
455 * @goal: target allocation block
456 *
457 * Find the reserved window which includes the goal, or the previous one
458 * if the goal is not in any window.
459 * Returns NULL if there are no windows or if all windows start after the goal.
460 */
461static struct ext4_reserve_window_node *
462search_reserve_window(struct rb_root *root, ext4_fsblk_t goal)
463{
464 struct rb_node *n = root->rb_node;
465 struct ext4_reserve_window_node *rsv;
466
467 if (!n)
468 return NULL;
469
470 do {
471 rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
472
473 if (goal < rsv->rsv_start)
474 n = n->rb_left;
475 else if (goal > rsv->rsv_end)
476 n = n->rb_right;
477 else
478 return rsv;
479 } while (n);
480 /*
481 * We've fallen off the end of the tree: the goal wasn't inside
482 * any particular node. OK, the previous node must be to one
483 * side of the interval containing the goal. If it's the RHS,
484 * we need to back up one.
485 */
486 if (rsv->rsv_start > goal) {
487 n = rb_prev(&rsv->rsv_node);
488 rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
489 }
490 return rsv;
491}
492
493/**
494 * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree.
495 * @sb: super block
496 * @rsv: reservation window to add
497 *
498 * Must be called with rsv_lock hold.
499 */
500void ext4_rsv_window_add(struct super_block *sb,
501 struct ext4_reserve_window_node *rsv)
502{
503 struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root;
504 struct rb_node *node = &rsv->rsv_node;
505 ext4_fsblk_t start = rsv->rsv_start;
506
507 struct rb_node ** p = &root->rb_node;
508 struct rb_node * parent = NULL;
509 struct ext4_reserve_window_node *this;
510
511 while (*p)
512 {
513 parent = *p;
514 this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node);
515
516 if (start < this->rsv_start)
517 p = &(*p)->rb_left;
518 else if (start > this->rsv_end)
519 p = &(*p)->rb_right;
520 else {
521 rsv_window_dump(root, 1);
522 BUG();
523 }
524 }
525
526 rb_link_node(node, parent, p);
527 rb_insert_color(node, root);
528}
529
530/**
531 * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree
532 * @sb: super block
533 * @rsv: reservation window to remove
534 *
535 * Mark the block reservation window as not allocated, and unlink it
536 * from the filesystem reservation window rb tree. Must be called with
537 * rsv_lock hold.
538 */
539static void rsv_window_remove(struct super_block *sb,
540 struct ext4_reserve_window_node *rsv)
541{
542 rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
543 rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
544 rsv->rsv_alloc_hit = 0;
545 rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root);
546}
547
548/*
549 * rsv_is_empty() -- Check if the reservation window is allocated.
550 * @rsv: given reservation window to check
551 *
552 * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
553 */
554static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
555{
556 /* a valid reservation end block could not be 0 */
557 return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
558}
559
560/**
561 * ext4_init_block_alloc_info()
562 * @inode: file inode structure
563 *
564 * Allocate and initialize the reservation window structure, and
565 * link the window to the ext4 inode structure at last
566 *
567 * The reservation window structure is only dynamically allocated
568 * and linked to ext4 inode the first time the open file
569 * needs a new block. So, before every ext4_new_block(s) call, for
570 * regular files, we should check whether the reservation window
571 * structure exists or not. In the latter case, this function is called.
572 * Fail to do so will result in block reservation being turned off for that
573 * open file.
574 *
575 * This function is called from ext4_get_blocks_handle(), also called
576 * when setting the reservation window size through ioctl before the file
577 * is open for write (needs block allocation).
578 *
579 * Needs down_write(i_data_sem) protection prior to call this function.
580 */
581void ext4_init_block_alloc_info(struct inode *inode)
582{
583 struct ext4_inode_info *ei = EXT4_I(inode);
584 struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
585 struct super_block *sb = inode->i_sb;
586
587 block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
588 if (block_i) {
589 struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node;
590
591 rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
592 rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
593
594 /*
595 * if filesystem is mounted with NORESERVATION, the goal
596 * reservation window size is set to zero to indicate
597 * block reservation is off
598 */
599 if (!test_opt(sb, RESERVATION))
600 rsv->rsv_goal_size = 0;
601 else
602 rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS;
603 rsv->rsv_alloc_hit = 0;
604 block_i->last_alloc_logical_block = 0;
605 block_i->last_alloc_physical_block = 0;
606 }
607 ei->i_block_alloc_info = block_i;
608}
609
610/**
611 * ext4_discard_reservation()
612 * @inode: inode
613 *
614 * Discard(free) block reservation window on last file close, or truncate
615 * or at last iput().
616 *
617 * It is being called in three cases:
618 * ext4_release_file(): last writer close the file
619 * ext4_clear_inode(): last iput(), when nobody link to this file.
620 * ext4_truncate(): when the block indirect map is about to change.
621 *
622 */
623void ext4_discard_reservation(struct inode *inode)
624{
625 struct ext4_inode_info *ei = EXT4_I(inode);
626 struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
627 struct ext4_reserve_window_node *rsv;
628 spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
629
630 ext4_mb_discard_inode_preallocations(inode);
631
632 if (!block_i)
633 return;
634
635 rsv = &block_i->rsv_window_node;
636 if (!rsv_is_empty(&rsv->rsv_window)) {
637 spin_lock(rsv_lock);
638 if (!rsv_is_empty(&rsv->rsv_window))
639 rsv_window_remove(inode->i_sb, rsv);
640 spin_unlock(rsv_lock);
641 }
642}
643 351
644/** 352/**
645 * ext4_free_blocks_sb() -- Free given blocks and update quota 353 * ext4_free_blocks_sb() -- Free given blocks and update quota
@@ -648,6 +356,13 @@ void ext4_discard_reservation(struct inode *inode)
648 * @block: start physcial block to free 356 * @block: start physcial block to free
649 * @count: number of blocks to free 357 * @count: number of blocks to free
650 * @pdquot_freed_blocks: pointer to quota 358 * @pdquot_freed_blocks: pointer to quota
359 *
360 * XXX This function is only used by the on-line resizing code, which
361 * should probably be fixed up to call the mballoc variant. There
362 * this needs to be cleaned up later; in fact, I'm not convinced this
363 * is 100% correct in the face of the mballoc code. The online resizing
364 * code needs to be fixed up to more tightly (and correctly) interlock
365 * with the mballoc code.
651 */ 366 */
652void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, 367void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
653 ext4_fsblk_t block, unsigned long count, 368 ext4_fsblk_t block, unsigned long count,
@@ -659,8 +374,8 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
659 ext4_grpblk_t bit; 374 ext4_grpblk_t bit;
660 unsigned long i; 375 unsigned long i;
661 unsigned long overflow; 376 unsigned long overflow;
662 struct ext4_group_desc * desc; 377 struct ext4_group_desc *desc;
663 struct ext4_super_block * es; 378 struct ext4_super_block *es;
664 struct ext4_sb_info *sbi; 379 struct ext4_sb_info *sbi;
665 int err = 0, ret; 380 int err = 0, ret;
666 ext4_grpblk_t group_freed; 381 ext4_grpblk_t group_freed;
@@ -671,13 +386,13 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
671 if (block < le32_to_cpu(es->s_first_data_block) || 386 if (block < le32_to_cpu(es->s_first_data_block) ||
672 block + count < block || 387 block + count < block ||
673 block + count > ext4_blocks_count(es)) { 388 block + count > ext4_blocks_count(es)) {
674 ext4_error (sb, "ext4_free_blocks", 389 ext4_error(sb, "ext4_free_blocks",
675 "Freeing blocks not in datazone - " 390 "Freeing blocks not in datazone - "
676 "block = %llu, count = %lu", block, count); 391 "block = %llu, count = %lu", block, count);
677 goto error_return; 392 goto error_return;
678 } 393 }
679 394
680 ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); 395 ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1);
681 396
682do_more: 397do_more:
683 overflow = 0; 398 overflow = 0;
@@ -694,7 +409,7 @@ do_more:
694 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 409 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
695 if (!bitmap_bh) 410 if (!bitmap_bh)
696 goto error_return; 411 goto error_return;
697 desc = ext4_get_group_desc (sb, block_group, &gd_bh); 412 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
698 if (!desc) 413 if (!desc)
699 goto error_return; 414 goto error_return;
700 415
@@ -703,10 +418,10 @@ do_more:
703 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || 418 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
704 in_range(block + count - 1, ext4_inode_table(sb, desc), 419 in_range(block + count - 1, ext4_inode_table(sb, desc),
705 sbi->s_itb_per_group)) { 420 sbi->s_itb_per_group)) {
706 ext4_error (sb, "ext4_free_blocks", 421 ext4_error(sb, "ext4_free_blocks",
707 "Freeing blocks in system zones - " 422 "Freeing blocks in system zones - "
708 "Block = %llu, count = %lu", 423 "Block = %llu, count = %lu",
709 block, count); 424 block, count);
710 goto error_return; 425 goto error_return;
711 } 426 }
712 427
@@ -848,7 +563,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
848 ext4_fsblk_t block, unsigned long count, 563 ext4_fsblk_t block, unsigned long count,
849 int metadata) 564 int metadata)
850{ 565{
851 struct super_block * sb; 566 struct super_block *sb;
852 unsigned long dquot_freed_blocks; 567 unsigned long dquot_freed_blocks;
853 568
854 /* this isn't the right place to decide whether block is metadata 569 /* this isn't the right place to decide whether block is metadata
@@ -859,748 +574,52 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
859 574
860 sb = inode->i_sb; 575 sb = inode->i_sb;
861 576
862 if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) 577 ext4_mb_free_blocks(handle, inode, block, count,
863 ext4_free_blocks_sb(handle, sb, block, count, 578 metadata, &dquot_freed_blocks);
864 &dquot_freed_blocks);
865 else
866 ext4_mb_free_blocks(handle, inode, block, count,
867 metadata, &dquot_freed_blocks);
868 if (dquot_freed_blocks) 579 if (dquot_freed_blocks)
869 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); 580 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
870 return; 581 return;
871} 582}
872 583
873/** 584int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
874 * ext4_test_allocatable() 585 s64 nblocks)
875 * @nr: given allocation block group
876 * @bh: bufferhead contains the bitmap of the given block group
877 *
878 * For ext4 allocations, we must not reuse any blocks which are
879 * allocated in the bitmap buffer's "last committed data" copy. This
880 * prevents deletes from freeing up the page for reuse until we have
881 * committed the delete transaction.
882 *
883 * If we didn't do this, then deleting something and reallocating it as
884 * data would allow the old block to be overwritten before the
885 * transaction committed (because we force data to disk before commit).
886 * This would lead to corruption if we crashed between overwriting the
887 * data and committing the delete.
888 *
889 * @@@ We may want to make this allocation behaviour conditional on
890 * data-writes at some point, and disable it for metadata allocations or
891 * sync-data inodes.
892 */
893static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh)
894{
895 int ret;
896 struct journal_head *jh = bh2jh(bh);
897
898 if (ext4_test_bit(nr, bh->b_data))
899 return 0;
900
901 jbd_lock_bh_state(bh);
902 if (!jh->b_committed_data)
903 ret = 1;
904 else
905 ret = !ext4_test_bit(nr, jh->b_committed_data);
906 jbd_unlock_bh_state(bh);
907 return ret;
908}
909
910/**
911 * bitmap_search_next_usable_block()
912 * @start: the starting block (group relative) of the search
913 * @bh: bufferhead contains the block group bitmap
914 * @maxblocks: the ending block (group relative) of the reservation
915 *
916 * The bitmap search --- search forward alternately through the actual
917 * bitmap on disk and the last-committed copy in journal, until we find a
918 * bit free in both bitmaps.
919 */
920static ext4_grpblk_t
921bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
922 ext4_grpblk_t maxblocks)
923{ 586{
924 ext4_grpblk_t next; 587 s64 free_blocks, dirty_blocks;
925 struct journal_head *jh = bh2jh(bh); 588 s64 root_blocks = 0;
926 589 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
927 while (start < maxblocks) { 590 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
928 next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start);
929 if (next >= maxblocks)
930 return -1;
931 if (ext4_test_allocatable(next, bh))
932 return next;
933 jbd_lock_bh_state(bh);
934 if (jh->b_committed_data)
935 start = ext4_find_next_zero_bit(jh->b_committed_data,
936 maxblocks, next);
937 jbd_unlock_bh_state(bh);
938 }
939 return -1;
940}
941 591
942/** 592 free_blocks = percpu_counter_read_positive(fbc);
943 * find_next_usable_block() 593 dirty_blocks = percpu_counter_read_positive(dbc);
944 * @start: the starting block (group relative) to find next
945 * allocatable block in bitmap.
946 * @bh: bufferhead contains the block group bitmap
947 * @maxblocks: the ending block (group relative) for the search
948 *
949 * Find an allocatable block in a bitmap. We honor both the bitmap and
950 * its last-committed copy (if that exists), and perform the "most
951 * appropriate allocation" algorithm of looking for a free block near
952 * the initial goal; then for a free byte somewhere in the bitmap; then
953 * for any free bit in the bitmap.
954 */
955static ext4_grpblk_t
956find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
957 ext4_grpblk_t maxblocks)
958{
959 ext4_grpblk_t here, next;
960 char *p, *r;
961
962 if (start > 0) {
963 /*
964 * The goal was occupied; search forward for a free
965 * block within the next XX blocks.
966 *
967 * end_goal is more or less random, but it has to be
968 * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the
969 * next 64-bit boundary is simple..
970 */
971 ext4_grpblk_t end_goal = (start + 63) & ~63;
972 if (end_goal > maxblocks)
973 end_goal = maxblocks;
974 here = ext4_find_next_zero_bit(bh->b_data, end_goal, start);
975 if (here < end_goal && ext4_test_allocatable(here, bh))
976 return here;
977 ext4_debug("Bit not found near goal\n");
978 }
979
980 here = start;
981 if (here < 0)
982 here = 0;
983
984 p = ((char *)bh->b_data) + (here >> 3);
985 r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
986 next = (r - ((char *)bh->b_data)) << 3;
987
988 if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh))
989 return next;
990
991 /*
992 * The bitmap search --- search forward alternately through the actual
993 * bitmap and the last-committed copy until we find a bit free in
994 * both
995 */
996 here = bitmap_search_next_usable_block(here, bh, maxblocks);
997 return here;
998}
999
1000/**
1001 * claim_block()
1002 * @block: the free block (group relative) to allocate
1003 * @bh: the bufferhead containts the block group bitmap
1004 *
1005 * We think we can allocate this block in this bitmap. Try to set the bit.
1006 * If that succeeds then check that nobody has allocated and then freed the
1007 * block since we saw that is was not marked in b_committed_data. If it _was_
1008 * allocated and freed then clear the bit in the bitmap again and return
1009 * zero (failure).
1010 */
1011static inline int
1012claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh)
1013{
1014 struct journal_head *jh = bh2jh(bh);
1015 int ret;
1016
1017 if (ext4_set_bit_atomic(lock, block, bh->b_data))
1018 return 0;
1019 jbd_lock_bh_state(bh);
1020 if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) {
1021 ext4_clear_bit_atomic(lock, block, bh->b_data);
1022 ret = 0;
1023 } else {
1024 ret = 1;
1025 }
1026 jbd_unlock_bh_state(bh);
1027 return ret;
1028}
1029 594
1030/** 595 if (!capable(CAP_SYS_RESOURCE) &&
1031 * ext4_try_to_allocate() 596 sbi->s_resuid != current->fsuid &&
1032 * @sb: superblock 597 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
1033 * @handle: handle to this transaction 598 root_blocks = ext4_r_blocks_count(sbi->s_es);
1034 * @group: given allocation block group
1035 * @bitmap_bh: bufferhead holds the block bitmap
1036 * @grp_goal: given target block within the group
1037 * @count: target number of blocks to allocate
1038 * @my_rsv: reservation window
1039 *
1040 * Attempt to allocate blocks within a give range. Set the range of allocation
1041 * first, then find the first free bit(s) from the bitmap (within the range),
1042 * and at last, allocate the blocks by claiming the found free bit as allocated.
1043 *
1044 * To set the range of this allocation:
1045 * if there is a reservation window, only try to allocate block(s) from the
1046 * file's own reservation window;
1047 * Otherwise, the allocation range starts from the give goal block, ends at
1048 * the block group's last block.
1049 *
1050 * If we failed to allocate the desired block then we may end up crossing to a
1051 * new bitmap. In that case we must release write access to the old one via
1052 * ext4_journal_release_buffer(), else we'll run out of credits.
1053 */
1054static ext4_grpblk_t
1055ext4_try_to_allocate(struct super_block *sb, handle_t *handle,
1056 ext4_group_t group, struct buffer_head *bitmap_bh,
1057 ext4_grpblk_t grp_goal, unsigned long *count,
1058 struct ext4_reserve_window *my_rsv)
1059{
1060 ext4_fsblk_t group_first_block;
1061 ext4_grpblk_t start, end;
1062 unsigned long num = 0;
1063
1064 /* we do allocation within the reservation window if we have a window */
1065 if (my_rsv) {
1066 group_first_block = ext4_group_first_block_no(sb, group);
1067 if (my_rsv->_rsv_start >= group_first_block)
1068 start = my_rsv->_rsv_start - group_first_block;
1069 else
1070 /* reservation window cross group boundary */
1071 start = 0;
1072 end = my_rsv->_rsv_end - group_first_block + 1;
1073 if (end > EXT4_BLOCKS_PER_GROUP(sb))
1074 /* reservation window crosses group boundary */
1075 end = EXT4_BLOCKS_PER_GROUP(sb);
1076 if ((start <= grp_goal) && (grp_goal < end))
1077 start = grp_goal;
1078 else
1079 grp_goal = -1;
1080 } else {
1081 if (grp_goal > 0)
1082 start = grp_goal;
1083 else
1084 start = 0;
1085 end = EXT4_BLOCKS_PER_GROUP(sb);
1086 }
1087
1088 BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb));
1089
1090repeat:
1091 if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) {
1092 grp_goal = find_next_usable_block(start, bitmap_bh, end);
1093 if (grp_goal < 0)
1094 goto fail_access;
1095 if (!my_rsv) {
1096 int i;
1097
1098 for (i = 0; i < 7 && grp_goal > start &&
1099 ext4_test_allocatable(grp_goal - 1,
1100 bitmap_bh);
1101 i++, grp_goal--)
1102 ;
1103 }
1104 }
1105 start = grp_goal;
1106
1107 if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group),
1108 grp_goal, bitmap_bh)) {
1109 /*
1110 * The block was allocated by another thread, or it was
1111 * allocated and then freed by another thread
1112 */
1113 start++;
1114 grp_goal++;
1115 if (start >= end)
1116 goto fail_access;
1117 goto repeat;
1118 }
1119 num++;
1120 grp_goal++;
1121 while (num < *count && grp_goal < end
1122 && ext4_test_allocatable(grp_goal, bitmap_bh)
1123 && claim_block(sb_bgl_lock(EXT4_SB(sb), group),
1124 grp_goal, bitmap_bh)) {
1125 num++;
1126 grp_goal++;
1127 }
1128 *count = num;
1129 return grp_goal - num;
1130fail_access:
1131 *count = num;
1132 return -1;
1133}
1134
1135/**
1136 * find_next_reservable_window():
1137 * find a reservable space within the given range.
1138 * It does not allocate the reservation window for now:
1139 * alloc_new_reservation() will do the work later.
1140 *
1141 * @search_head: the head of the searching list;
1142 * This is not necessarily the list head of the whole filesystem
1143 *
1144 * We have both head and start_block to assist the search
1145 * for the reservable space. The list starts from head,
1146 * but we will shift to the place where start_block is,
1147 * then start from there, when looking for a reservable space.
1148 *
1149 * @size: the target new reservation window size
1150 *
1151 * @group_first_block: the first block we consider to start
1152 * the real search from
1153 *
1154 * @last_block:
1155 * the maximum block number that our goal reservable space
1156 * could start from. This is normally the last block in this
1157 * group. The search will end when we found the start of next
1158 * possible reservable space is out of this boundary.
1159 * This could handle the cross boundary reservation window
1160 * request.
1161 *
1162 * basically we search from the given range, rather than the whole
1163 * reservation double linked list, (start_block, last_block)
1164 * to find a free region that is of my size and has not
1165 * been reserved.
1166 *
1167 */
1168static int find_next_reservable_window(
1169 struct ext4_reserve_window_node *search_head,
1170 struct ext4_reserve_window_node *my_rsv,
1171 struct super_block * sb,
1172 ext4_fsblk_t start_block,
1173 ext4_fsblk_t last_block)
1174{
1175 struct rb_node *next;
1176 struct ext4_reserve_window_node *rsv, *prev;
1177 ext4_fsblk_t cur;
1178 int size = my_rsv->rsv_goal_size;
1179
1180 /* TODO: make the start of the reservation window byte-aligned */
1181 /* cur = *start_block & ~7;*/
1182 cur = start_block;
1183 rsv = search_head;
1184 if (!rsv)
1185 return -1;
1186
1187 while (1) {
1188 if (cur <= rsv->rsv_end)
1189 cur = rsv->rsv_end + 1;
1190
1191 /* TODO?
1192 * in the case we could not find a reservable space
1193 * that is what is expected, during the re-search, we could
1194 * remember what's the largest reservable space we could have
1195 * and return that one.
1196 *
1197 * For now it will fail if we could not find the reservable
1198 * space with expected-size (or more)...
1199 */
1200 if (cur > last_block)
1201 return -1; /* fail */
1202
1203 prev = rsv;
1204 next = rb_next(&rsv->rsv_node);
1205 rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node);
1206 599
1207 /* 600 if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
1208 * Reached the last reservation, we can just append to the 601 EXT4_FREEBLOCKS_WATERMARK) {
1209 * previous one. 602 free_blocks = percpu_counter_sum(fbc);
1210 */ 603 dirty_blocks = percpu_counter_sum(dbc);
1211 if (!next) 604 if (dirty_blocks < 0) {
1212 break; 605 printk(KERN_CRIT "Dirty block accounting "
1213 606 "went wrong %lld\n",
1214 if (cur + size <= rsv->rsv_start) { 607 dirty_blocks);
1215 /*
1216 * Found a reserveable space big enough. We could
1217 * have a reservation across the group boundary here
1218 */
1219 break;
1220 } 608 }
1221 } 609 }
1222 /* 610 /* Check whether we have space after
1223 * we come here either : 611 * accounting for current dirty blocks
1224 * when we reach the end of the whole list,
1225 * and there is empty reservable space after last entry in the list.
1226 * append it to the end of the list.
1227 *
1228 * or we found one reservable space in the middle of the list,
1229 * return the reservation window that we could append to.
1230 * succeed.
1231 */ 612 */
613 if (free_blocks < ((root_blocks + nblocks) + dirty_blocks))
614 /* we don't have free space */
615 return -ENOSPC;
1232 616
1233 if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) 617 /* Add the blocks to nblocks */
1234 rsv_window_remove(sb, my_rsv); 618 percpu_counter_add(dbc, nblocks);
1235
1236 /*
1237 * Let's book the whole avaliable window for now. We will check the
1238 * disk bitmap later and then, if there are free blocks then we adjust
1239 * the window size if it's larger than requested.
1240 * Otherwise, we will remove this node from the tree next time
1241 * call find_next_reservable_window.
1242 */
1243 my_rsv->rsv_start = cur;
1244 my_rsv->rsv_end = cur + size - 1;
1245 my_rsv->rsv_alloc_hit = 0;
1246
1247 if (prev != my_rsv)
1248 ext4_rsv_window_add(sb, my_rsv);
1249
1250 return 0; 619 return 0;
1251} 620}
1252 621
1253/** 622/**
1254 * alloc_new_reservation()--allocate a new reservation window
1255 *
1256 * To make a new reservation, we search part of the filesystem
1257 * reservation list (the list that inside the group). We try to
1258 * allocate a new reservation window near the allocation goal,
1259 * or the beginning of the group, if there is no goal.
1260 *
1261 * We first find a reservable space after the goal, then from
1262 * there, we check the bitmap for the first free block after
1263 * it. If there is no free block until the end of group, then the
1264 * whole group is full, we failed. Otherwise, check if the free
1265 * block is inside the expected reservable space, if so, we
1266 * succeed.
1267 * If the first free block is outside the reservable space, then
1268 * start from the first free block, we search for next available
1269 * space, and go on.
1270 *
1271 * on succeed, a new reservation will be found and inserted into the list
1272 * It contains at least one free block, and it does not overlap with other
1273 * reservation windows.
1274 *
1275 * failed: we failed to find a reservation window in this group
1276 *
1277 * @rsv: the reservation
1278 *
1279 * @grp_goal: The goal (group-relative). It is where the search for a
1280 * free reservable space should start from.
1281 * if we have a grp_goal(grp_goal >0 ), then start from there,
1282 * no grp_goal(grp_goal = -1), we start from the first block
1283 * of the group.
1284 *
1285 * @sb: the super block
1286 * @group: the group we are trying to allocate in
1287 * @bitmap_bh: the block group block bitmap
1288 *
1289 */
1290static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
1291 ext4_grpblk_t grp_goal, struct super_block *sb,
1292 ext4_group_t group, struct buffer_head *bitmap_bh)
1293{
1294 struct ext4_reserve_window_node *search_head;
1295 ext4_fsblk_t group_first_block, group_end_block, start_block;
1296 ext4_grpblk_t first_free_block;
1297 struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root;
1298 unsigned long size;
1299 int ret;
1300 spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
1301
1302 group_first_block = ext4_group_first_block_no(sb, group);
1303 group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1304
1305 if (grp_goal < 0)
1306 start_block = group_first_block;
1307 else
1308 start_block = grp_goal + group_first_block;
1309
1310 size = my_rsv->rsv_goal_size;
1311
1312 if (!rsv_is_empty(&my_rsv->rsv_window)) {
1313 /*
1314 * if the old reservation is cross group boundary
1315 * and if the goal is inside the old reservation window,
1316 * we will come here when we just failed to allocate from
1317 * the first part of the window. We still have another part
1318 * that belongs to the next group. In this case, there is no
1319 * point to discard our window and try to allocate a new one
1320 * in this group(which will fail). we should
1321 * keep the reservation window, just simply move on.
1322 *
1323 * Maybe we could shift the start block of the reservation
1324 * window to the first block of next group.
1325 */
1326
1327 if ((my_rsv->rsv_start <= group_end_block) &&
1328 (my_rsv->rsv_end > group_end_block) &&
1329 (start_block >= my_rsv->rsv_start))
1330 return -1;
1331
1332 if ((my_rsv->rsv_alloc_hit >
1333 (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
1334 /*
1335 * if the previously allocation hit ratio is
1336 * greater than 1/2, then we double the size of
1337 * the reservation window the next time,
1338 * otherwise we keep the same size window
1339 */
1340 size = size * 2;
1341 if (size > EXT4_MAX_RESERVE_BLOCKS)
1342 size = EXT4_MAX_RESERVE_BLOCKS;
1343 my_rsv->rsv_goal_size= size;
1344 }
1345 }
1346
1347 spin_lock(rsv_lock);
1348 /*
1349 * shift the search start to the window near the goal block
1350 */
1351 search_head = search_reserve_window(fs_rsv_root, start_block);
1352
1353 /*
1354 * find_next_reservable_window() simply finds a reservable window
1355 * inside the given range(start_block, group_end_block).
1356 *
1357 * To make sure the reservation window has a free bit inside it, we
1358 * need to check the bitmap after we found a reservable window.
1359 */
1360retry:
1361 ret = find_next_reservable_window(search_head, my_rsv, sb,
1362 start_block, group_end_block);
1363
1364 if (ret == -1) {
1365 if (!rsv_is_empty(&my_rsv->rsv_window))
1366 rsv_window_remove(sb, my_rsv);
1367 spin_unlock(rsv_lock);
1368 return -1;
1369 }
1370
1371 /*
1372 * On success, find_next_reservable_window() returns the
1373 * reservation window where there is a reservable space after it.
1374 * Before we reserve this reservable space, we need
1375 * to make sure there is at least a free block inside this region.
1376 *
1377 * searching the first free bit on the block bitmap and copy of
1378 * last committed bitmap alternatively, until we found a allocatable
1379 * block. Search start from the start block of the reservable space
1380 * we just found.
1381 */
1382 spin_unlock(rsv_lock);
1383 first_free_block = bitmap_search_next_usable_block(
1384 my_rsv->rsv_start - group_first_block,
1385 bitmap_bh, group_end_block - group_first_block + 1);
1386
1387 if (first_free_block < 0) {
1388 /*
1389 * no free block left on the bitmap, no point
1390 * to reserve the space. return failed.
1391 */
1392 spin_lock(rsv_lock);
1393 if (!rsv_is_empty(&my_rsv->rsv_window))
1394 rsv_window_remove(sb, my_rsv);
1395 spin_unlock(rsv_lock);
1396 return -1; /* failed */
1397 }
1398
1399 start_block = first_free_block + group_first_block;
1400 /*
1401 * check if the first free block is within the
1402 * free space we just reserved
1403 */
1404 if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
1405 return 0; /* success */
1406 /*
1407 * if the first free bit we found is out of the reservable space
1408 * continue search for next reservable space,
1409 * start from where the free block is,
1410 * we also shift the list head to where we stopped last time
1411 */
1412 search_head = my_rsv;
1413 spin_lock(rsv_lock);
1414 goto retry;
1415}
1416
1417/**
1418 * try_to_extend_reservation()
1419 * @my_rsv: given reservation window
1420 * @sb: super block
1421 * @size: the delta to extend
1422 *
1423 * Attempt to expand the reservation window large enough to have
1424 * required number of free blocks
1425 *
1426 * Since ext4_try_to_allocate() will always allocate blocks within
1427 * the reservation window range, if the window size is too small,
1428 * multiple blocks allocation has to stop at the end of the reservation
1429 * window. To make this more efficient, given the total number of
1430 * blocks needed and the current size of the window, we try to
1431 * expand the reservation window size if necessary on a best-effort
1432 * basis before ext4_new_blocks() tries to allocate blocks,
1433 */
1434static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
1435 struct super_block *sb, int size)
1436{
1437 struct ext4_reserve_window_node *next_rsv;
1438 struct rb_node *next;
1439 spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
1440
1441 if (!spin_trylock(rsv_lock))
1442 return;
1443
1444 next = rb_next(&my_rsv->rsv_node);
1445
1446 if (!next)
1447 my_rsv->rsv_end += size;
1448 else {
1449 next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node);
1450
1451 if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
1452 my_rsv->rsv_end += size;
1453 else
1454 my_rsv->rsv_end = next_rsv->rsv_start - 1;
1455 }
1456 spin_unlock(rsv_lock);
1457}
1458
1459/**
1460 * ext4_try_to_allocate_with_rsv()
1461 * @sb: superblock
1462 * @handle: handle to this transaction
1463 * @group: given allocation block group
1464 * @bitmap_bh: bufferhead holds the block bitmap
1465 * @grp_goal: given target block within the group
1466 * @count: target number of blocks to allocate
1467 * @my_rsv: reservation window
1468 * @errp: pointer to store the error code
1469 *
1470 * This is the main function used to allocate a new block and its reservation
1471 * window.
1472 *
1473 * Each time when a new block allocation is need, first try to allocate from
1474 * its own reservation. If it does not have a reservation window, instead of
1475 * looking for a free bit on bitmap first, then look up the reservation list to
1476 * see if it is inside somebody else's reservation window, we try to allocate a
1477 * reservation window for it starting from the goal first. Then do the block
1478 * allocation within the reservation window.
1479 *
1480 * This will avoid keeping on searching the reservation list again and
1481 * again when somebody is looking for a free block (without
1482 * reservation), and there are lots of free blocks, but they are all
1483 * being reserved.
1484 *
1485 * We use a red-black tree for the per-filesystem reservation list.
1486 *
1487 */
1488static ext4_grpblk_t
1489ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1490 ext4_group_t group, struct buffer_head *bitmap_bh,
1491 ext4_grpblk_t grp_goal,
1492 struct ext4_reserve_window_node * my_rsv,
1493 unsigned long *count, int *errp)
1494{
1495 ext4_fsblk_t group_first_block, group_last_block;
1496 ext4_grpblk_t ret = 0;
1497 int fatal;
1498 unsigned long num = *count;
1499
1500 *errp = 0;
1501
1502 /*
1503 * Make sure we use undo access for the bitmap, because it is critical
1504 * that we do the frozen_data COW on bitmap buffers in all cases even
1505 * if the buffer is in BJ_Forget state in the committing transaction.
1506 */
1507 BUFFER_TRACE(bitmap_bh, "get undo access for new block");
1508 fatal = ext4_journal_get_undo_access(handle, bitmap_bh);
1509 if (fatal) {
1510 *errp = fatal;
1511 return -1;
1512 }
1513
1514 /*
1515 * we don't deal with reservation when
1516 * filesystem is mounted without reservation
1517 * or the file is not a regular file
1518 * or last attempt to allocate a block with reservation turned on failed
1519 */
1520 if (my_rsv == NULL ) {
1521 ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
1522 grp_goal, count, NULL);
1523 goto out;
1524 }
1525 /*
1526 * grp_goal is a group relative block number (if there is a goal)
1527 * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb)
1528 * first block is a filesystem wide block number
1529 * first block is the block number of the first block in this group
1530 */
1531 group_first_block = ext4_group_first_block_no(sb, group);
1532 group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1533
1534 /*
1535 * Basically we will allocate a new block from inode's reservation
1536 * window.
1537 *
1538 * We need to allocate a new reservation window, if:
1539 * a) inode does not have a reservation window; or
1540 * b) last attempt to allocate a block from existing reservation
1541 * failed; or
1542 * c) we come here with a goal and with a reservation window
1543 *
1544 * We do not need to allocate a new reservation window if we come here
1545 * at the beginning with a goal and the goal is inside the window, or
1546 * we don't have a goal but already have a reservation window.
1547 * then we could go to allocate from the reservation window directly.
1548 */
1549 while (1) {
1550 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
1551 !goal_in_my_reservation(&my_rsv->rsv_window,
1552 grp_goal, group, sb)) {
1553 if (my_rsv->rsv_goal_size < *count)
1554 my_rsv->rsv_goal_size = *count;
1555 ret = alloc_new_reservation(my_rsv, grp_goal, sb,
1556 group, bitmap_bh);
1557 if (ret < 0)
1558 break; /* failed */
1559
1560 if (!goal_in_my_reservation(&my_rsv->rsv_window,
1561 grp_goal, group, sb))
1562 grp_goal = -1;
1563 } else if (grp_goal >= 0) {
1564 int curr = my_rsv->rsv_end -
1565 (grp_goal + group_first_block) + 1;
1566
1567 if (curr < *count)
1568 try_to_extend_reservation(my_rsv, sb,
1569 *count - curr);
1570 }
1571
1572 if ((my_rsv->rsv_start > group_last_block) ||
1573 (my_rsv->rsv_end < group_first_block)) {
1574 rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1);
1575 BUG();
1576 }
1577 ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
1578 grp_goal, &num, &my_rsv->rsv_window);
1579 if (ret >= 0) {
1580 my_rsv->rsv_alloc_hit += num;
1581 *count = num;
1582 break; /* succeed */
1583 }
1584 num = *count;
1585 }
1586out:
1587 if (ret >= 0) {
1588 BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
1589 "bitmap block");
1590 fatal = ext4_journal_dirty_metadata(handle, bitmap_bh);
1591 if (fatal) {
1592 *errp = fatal;
1593 return -1;
1594 }
1595 return ret;
1596 }
1597
1598 BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
1599 ext4_journal_release_buffer(handle, bitmap_bh);
1600 return ret;
1601}
1602
1603/**
1604 * ext4_has_free_blocks() 623 * ext4_has_free_blocks()
1605 * @sbi: in-core super block structure. 624 * @sbi: in-core super block structure.
1606 * @nblocks: number of neeed blocks 625 * @nblocks: number of neeed blocks
@@ -1610,29 +629,34 @@ out:
1610 * On success, return nblocks 629 * On success, return nblocks
1611 */ 630 */
1612ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, 631ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
1613 ext4_fsblk_t nblocks) 632 s64 nblocks)
1614{ 633{
1615 ext4_fsblk_t free_blocks; 634 s64 free_blocks, dirty_blocks;
1616 ext4_fsblk_t root_blocks = 0; 635 s64 root_blocks = 0;
636 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
637 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
1617 638
1618 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 639 free_blocks = percpu_counter_read_positive(fbc);
640 dirty_blocks = percpu_counter_read_positive(dbc);
1619 641
1620 if (!capable(CAP_SYS_RESOURCE) && 642 if (!capable(CAP_SYS_RESOURCE) &&
1621 sbi->s_resuid != current->fsuid && 643 sbi->s_resuid != current->fsuid &&
1622 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) 644 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
1623 root_blocks = ext4_r_blocks_count(sbi->s_es); 645 root_blocks = ext4_r_blocks_count(sbi->s_es);
1624#ifdef CONFIG_SMP 646
1625 if (free_blocks - root_blocks < FBC_BATCH) 647 if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
1626 free_blocks = 648 EXT4_FREEBLOCKS_WATERMARK) {
1627 percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); 649 free_blocks = percpu_counter_sum(fbc);
1628#endif 650 dirty_blocks = percpu_counter_sum(dbc);
1629 if (free_blocks <= root_blocks) 651 }
652 if (free_blocks <= (root_blocks + dirty_blocks))
1630 /* we don't have free space */ 653 /* we don't have free space */
1631 return 0; 654 return 0;
1632 if (free_blocks - root_blocks < nblocks) 655
1633 return free_blocks - root_blocks; 656 if (free_blocks - (root_blocks + dirty_blocks) < nblocks)
657 return free_blocks - (root_blocks + dirty_blocks);
1634 return nblocks; 658 return nblocks;
1635 } 659}
1636 660
1637 661
1638/** 662/**
@@ -1657,303 +681,6 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
1657 return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); 681 return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
1658} 682}
1659 683
1660/**
1661 * ext4_old_new_blocks() -- core block bitmap based block allocation function
1662 *
1663 * @handle: handle to this transaction
1664 * @inode: file inode
1665 * @goal: given target block(filesystem wide)
1666 * @count: target number of blocks to allocate
1667 * @errp: error code
1668 *
1669 * ext4_old_new_blocks uses a goal block to assist allocation and look up
1670 * the block bitmap directly to do block allocation. It tries to
1671 * allocate block(s) from the block group contains the goal block first. If
1672 * that fails, it will try to allocate block(s) from other block groups
1673 * without any specific goal block.
1674 *
1675 * This function is called when -o nomballoc mount option is enabled
1676 *
1677 */
1678ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
1679 ext4_fsblk_t goal, unsigned long *count, int *errp)
1680{
1681 struct buffer_head *bitmap_bh = NULL;
1682 struct buffer_head *gdp_bh;
1683 ext4_group_t group_no;
1684 ext4_group_t goal_group;
1685 ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */
1686 ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/
1687 ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */
1688 ext4_group_t bgi; /* blockgroup iteration index */
1689 int fatal = 0, err;
1690 int performed_allocation = 0;
1691 ext4_grpblk_t free_blocks; /* number of free blocks in a group */
1692 struct super_block *sb;
1693 struct ext4_group_desc *gdp;
1694 struct ext4_super_block *es;
1695 struct ext4_sb_info *sbi;
1696 struct ext4_reserve_window_node *my_rsv = NULL;
1697 struct ext4_block_alloc_info *block_i;
1698 unsigned short windowsz = 0;
1699 ext4_group_t ngroups;
1700 unsigned long num = *count;
1701
1702 sb = inode->i_sb;
1703 if (!sb) {
1704 *errp = -ENODEV;
1705 printk("ext4_new_block: nonexistent device");
1706 return 0;
1707 }
1708
1709 sbi = EXT4_SB(sb);
1710 if (!EXT4_I(inode)->i_delalloc_reserved_flag) {
1711 /*
1712 * With delalloc we already reserved the blocks
1713 */
1714 *count = ext4_has_free_blocks(sbi, *count);
1715 }
1716 if (*count == 0) {
1717 *errp = -ENOSPC;
1718 return 0; /*return with ENOSPC error */
1719 }
1720 num = *count;
1721
1722 /*
1723 * Check quota for allocation of this block.
1724 */
1725 if (DQUOT_ALLOC_BLOCK(inode, num)) {
1726 *errp = -EDQUOT;
1727 return 0;
1728 }
1729
1730 sbi = EXT4_SB(sb);
1731 es = EXT4_SB(sb)->s_es;
1732 ext4_debug("goal=%llu.\n", goal);
1733 /*
1734 * Allocate a block from reservation only when
1735 * filesystem is mounted with reservation(default,-o reservation), and
1736 * it's a regular file, and
1737 * the desired window size is greater than 0 (One could use ioctl
1738 * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off
1739 * reservation on that particular file)
1740 */
1741 block_i = EXT4_I(inode)->i_block_alloc_info;
1742 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
1743 my_rsv = &block_i->rsv_window_node;
1744
1745 /*
1746 * First, test whether the goal block is free.
1747 */
1748 if (goal < le32_to_cpu(es->s_first_data_block) ||
1749 goal >= ext4_blocks_count(es))
1750 goal = le32_to_cpu(es->s_first_data_block);
1751 ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk);
1752 goal_group = group_no;
1753retry_alloc:
1754 gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
1755 if (!gdp)
1756 goto io_error;
1757
1758 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1759 /*
1760 * if there is not enough free blocks to make a new resevation
1761 * turn off reservation for this allocation
1762 */
1763 if (my_rsv && (free_blocks < windowsz)
1764 && (rsv_is_empty(&my_rsv->rsv_window)))
1765 my_rsv = NULL;
1766
1767 if (free_blocks > 0) {
1768 bitmap_bh = ext4_read_block_bitmap(sb, group_no);
1769 if (!bitmap_bh)
1770 goto io_error;
1771 grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
1772 group_no, bitmap_bh, grp_target_blk,
1773 my_rsv, &num, &fatal);
1774 if (fatal)
1775 goto out;
1776 if (grp_alloc_blk >= 0)
1777 goto allocated;
1778 }
1779
1780 ngroups = EXT4_SB(sb)->s_groups_count;
1781 smp_rmb();
1782
1783 /*
1784 * Now search the rest of the groups. We assume that
1785 * group_no and gdp correctly point to the last group visited.
1786 */
1787 for (bgi = 0; bgi < ngroups; bgi++) {
1788 group_no++;
1789 if (group_no >= ngroups)
1790 group_no = 0;
1791 gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
1792 if (!gdp)
1793 goto io_error;
1794 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1795 /*
1796 * skip this group if the number of
1797 * free blocks is less than half of the reservation
1798 * window size.
1799 */
1800 if (free_blocks <= (windowsz/2))
1801 continue;
1802
1803 brelse(bitmap_bh);
1804 bitmap_bh = ext4_read_block_bitmap(sb, group_no);
1805 if (!bitmap_bh)
1806 goto io_error;
1807 /*
1808 * try to allocate block(s) from this group, without a goal(-1).
1809 */
1810 grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
1811 group_no, bitmap_bh, -1, my_rsv,
1812 &num, &fatal);
1813 if (fatal)
1814 goto out;
1815 if (grp_alloc_blk >= 0)
1816 goto allocated;
1817 }
1818 /*
1819 * We may end up a bogus ealier ENOSPC error due to
1820 * filesystem is "full" of reservations, but
1821 * there maybe indeed free blocks avaliable on disk
1822 * In this case, we just forget about the reservations
1823 * just do block allocation as without reservations.
1824 */
1825 if (my_rsv) {
1826 my_rsv = NULL;
1827 windowsz = 0;
1828 group_no = goal_group;
1829 goto retry_alloc;
1830 }
1831 /* No space left on the device */
1832 *errp = -ENOSPC;
1833 goto out;
1834
1835allocated:
1836
1837 ext4_debug("using block group %lu(%d)\n",
1838 group_no, gdp->bg_free_blocks_count);
1839
1840 BUFFER_TRACE(gdp_bh, "get_write_access");
1841 fatal = ext4_journal_get_write_access(handle, gdp_bh);
1842 if (fatal)
1843 goto out;
1844
1845 ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
1846
1847 if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
1848 in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) ||
1849 in_range(ret_block, ext4_inode_table(sb, gdp),
1850 EXT4_SB(sb)->s_itb_per_group) ||
1851 in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
1852 EXT4_SB(sb)->s_itb_per_group)) {
1853 ext4_error(sb, "ext4_new_block",
1854 "Allocating block in system zone - "
1855 "blocks from %llu, length %lu",
1856 ret_block, num);
1857 /*
1858 * claim_block marked the blocks we allocated
1859 * as in use. So we may want to selectively
1860 * mark some of the blocks as free
1861 */
1862 goto retry_alloc;
1863 }
1864
1865 performed_allocation = 1;
1866
1867#ifdef CONFIG_JBD2_DEBUG
1868 {
1869 struct buffer_head *debug_bh;
1870
1871 /* Record bitmap buffer state in the newly allocated block */
1872 debug_bh = sb_find_get_block(sb, ret_block);
1873 if (debug_bh) {
1874 BUFFER_TRACE(debug_bh, "state when allocated");
1875 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
1876 brelse(debug_bh);
1877 }
1878 }
1879 jbd_lock_bh_state(bitmap_bh);
1880 spin_lock(sb_bgl_lock(sbi, group_no));
1881 if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
1882 int i;
1883
1884 for (i = 0; i < num; i++) {
1885 if (ext4_test_bit(grp_alloc_blk+i,
1886 bh2jh(bitmap_bh)->b_committed_data)) {
1887 printk("%s: block was unexpectedly set in "
1888 "b_committed_data\n", __func__);
1889 }
1890 }
1891 }
1892 ext4_debug("found bit %d\n", grp_alloc_blk);
1893 spin_unlock(sb_bgl_lock(sbi, group_no));
1894 jbd_unlock_bh_state(bitmap_bh);
1895#endif
1896
1897 if (ret_block + num - 1 >= ext4_blocks_count(es)) {
1898 ext4_error(sb, "ext4_new_block",
1899 "block(%llu) >= blocks count(%llu) - "
1900 "block_group = %lu, es == %p ", ret_block,
1901 ext4_blocks_count(es), group_no, es);
1902 goto out;
1903 }
1904
1905 /*
1906 * It is up to the caller to add the new buffer to a journal
1907 * list of some description. We don't know in advance whether
1908 * the caller wants to use it as metadata or data.
1909 */
1910 spin_lock(sb_bgl_lock(sbi, group_no));
1911 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1912 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
1913 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1914 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1915 spin_unlock(sb_bgl_lock(sbi, group_no));
1916 if (!EXT4_I(inode)->i_delalloc_reserved_flag)
1917 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
1918
1919 if (sbi->s_log_groups_per_flex) {
1920 ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
1921 spin_lock(sb_bgl_lock(sbi, flex_group));
1922 sbi->s_flex_groups[flex_group].free_blocks -= num;
1923 spin_unlock(sb_bgl_lock(sbi, flex_group));
1924 }
1925
1926 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
1927 err = ext4_journal_dirty_metadata(handle, gdp_bh);
1928 if (!fatal)
1929 fatal = err;
1930
1931 sb->s_dirt = 1;
1932 if (fatal)
1933 goto out;
1934
1935 *errp = 0;
1936 brelse(bitmap_bh);
1937 DQUOT_FREE_BLOCK(inode, *count-num);
1938 *count = num;
1939 return ret_block;
1940
1941io_error:
1942 *errp = -EIO;
1943out:
1944 if (fatal) {
1945 *errp = fatal;
1946 ext4_std_error(sb, fatal);
1947 }
1948 /*
1949 * Undo the block allocation
1950 */
1951 if (!performed_allocation)
1952 DQUOT_FREE_BLOCK(inode, *count);
1953 brelse(bitmap_bh);
1954 return 0;
1955}
1956
1957#define EXT4_META_BLOCK 0x1 684#define EXT4_META_BLOCK 0x1
1958 685
1959static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, 686static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
@@ -1963,10 +690,6 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
1963 struct ext4_allocation_request ar; 690 struct ext4_allocation_request ar;
1964 ext4_fsblk_t ret; 691 ext4_fsblk_t ret;
1965 692
1966 if (!test_opt(inode->i_sb, MBALLOC)) {
1967 return ext4_old_new_blocks(handle, inode, goal, count, errp);
1968 }
1969
1970 memset(&ar, 0, sizeof(ar)); 693 memset(&ar, 0, sizeof(ar));
1971 /* Fill with neighbour allocated blocks */ 694 /* Fill with neighbour allocated blocks */
1972 695
@@ -2008,7 +731,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
2008 /* 731 /*
2009 * Account for the allocated meta blocks 732 * Account for the allocated meta blocks
2010 */ 733 */
2011 if (!(*errp)) { 734 if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
2012 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 735 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
2013 EXT4_I(inode)->i_allocated_meta_blocks += *count; 736 EXT4_I(inode)->i_allocated_meta_blocks += *count;
2014 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 737 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -2093,10 +816,9 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
2093 bitmap_count += x; 816 bitmap_count += x;
2094 } 817 }
2095 brelse(bitmap_bh); 818 brelse(bitmap_bh);
2096 printk("ext4_count_free_blocks: stored = %llu" 819 printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu"
2097 ", computed = %llu, %llu\n", 820 ", computed = %llu, %llu\n", ext4_free_blocks_count(es),
2098 ext4_free_blocks_count(es), 821 desc_count, bitmap_count);
2099 desc_count, bitmap_count);
2100 return bitmap_count; 822 return bitmap_count;
2101#else 823#else
2102 desc_count = 0; 824 desc_count = 0;
@@ -2183,8 +905,9 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
2183 905
2184 if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || 906 if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) ||
2185 metagroup < first_meta_bg) 907 metagroup < first_meta_bg)
2186 return ext4_bg_num_gdb_nometa(sb,group); 908 return ext4_bg_num_gdb_nometa(sb, group);
2187 909
2188 return ext4_bg_num_gdb_meta(sb,group); 910 return ext4_bg_num_gdb_meta(sb,group);
2189 911
2190} 912}
913
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index d37ea6750454..0a7a6663c190 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -15,17 +15,17 @@
15 15
16static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; 16static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
17 17
18unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars) 18unsigned long ext4_count_free(struct buffer_head *map, unsigned int numchars)
19{ 19{
20 unsigned int i; 20 unsigned int i;
21 unsigned long sum = 0; 21 unsigned long sum = 0;
22 22
23 if (!map) 23 if (!map)
24 return (0); 24 return 0;
25 for (i = 0; i < numchars; i++) 25 for (i = 0; i < numchars; i++)
26 sum += nibblemap[map->b_data[i] & 0xf] + 26 sum += nibblemap[map->b_data[i] & 0xf] +
27 nibblemap[(map->b_data[i] >> 4) & 0xf]; 27 nibblemap[(map->b_data[i] >> 4) & 0xf];
28 return (sum); 28 return sum;
29} 29}
30 30
31#endif /* EXT4FS_DEBUG */ 31#endif /* EXT4FS_DEBUG */
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ec8e33b45219..3ca6a2b7632d 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table[] = {
33}; 33};
34 34
35static int ext4_readdir(struct file *, void *, filldir_t); 35static int ext4_readdir(struct file *, void *, filldir_t);
36static int ext4_dx_readdir(struct file * filp, 36static int ext4_dx_readdir(struct file *filp,
37 void * dirent, filldir_t filldir); 37 void *dirent, filldir_t filldir);
38static int ext4_release_dir (struct inode * inode, 38static int ext4_release_dir(struct inode *inode,
39 struct file * filp); 39 struct file *filp);
40 40
41const struct file_operations ext4_dir_operations = { 41const struct file_operations ext4_dir_operations = {
42 .llseek = generic_file_llseek, 42 .llseek = generic_file_llseek,
@@ -61,12 +61,12 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
61} 61}
62 62
63 63
64int ext4_check_dir_entry (const char * function, struct inode * dir, 64int ext4_check_dir_entry(const char *function, struct inode *dir,
65 struct ext4_dir_entry_2 * de, 65 struct ext4_dir_entry_2 *de,
66 struct buffer_head * bh, 66 struct buffer_head *bh,
67 unsigned long offset) 67 unsigned long offset)
68{ 68{
69 const char * error_msg = NULL; 69 const char *error_msg = NULL;
70 const int rlen = ext4_rec_len_from_disk(de->rec_len); 70 const int rlen = ext4_rec_len_from_disk(de->rec_len);
71 71
72 if (rlen < EXT4_DIR_REC_LEN(1)) 72 if (rlen < EXT4_DIR_REC_LEN(1))
@@ -82,7 +82,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
82 error_msg = "inode out of bounds"; 82 error_msg = "inode out of bounds";
83 83
84 if (error_msg != NULL) 84 if (error_msg != NULL)
85 ext4_error (dir->i_sb, function, 85 ext4_error(dir->i_sb, function,
86 "bad entry in directory #%lu: %s - " 86 "bad entry in directory #%lu: %s - "
87 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", 87 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
88 dir->i_ino, error_msg, offset, 88 dir->i_ino, error_msg, offset,
@@ -91,8 +91,8 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
91 return error_msg == NULL ? 1 : 0; 91 return error_msg == NULL ? 1 : 0;
92} 92}
93 93
94static int ext4_readdir(struct file * filp, 94static int ext4_readdir(struct file *filp,
95 void * dirent, filldir_t filldir) 95 void *dirent, filldir_t filldir)
96{ 96{
97 int error = 0; 97 int error = 0;
98 unsigned long offset; 98 unsigned long offset;
@@ -102,6 +102,7 @@ static int ext4_readdir(struct file * filp,
102 int err; 102 int err;
103 struct inode *inode = filp->f_path.dentry->d_inode; 103 struct inode *inode = filp->f_path.dentry->d_inode;
104 int ret = 0; 104 int ret = 0;
105 int dir_has_error = 0;
105 106
106 sb = inode->i_sb; 107 sb = inode->i_sb;
107 108
@@ -148,9 +149,13 @@ static int ext4_readdir(struct file * filp,
148 * of recovering data when there's a bad sector 149 * of recovering data when there's a bad sector
149 */ 150 */
150 if (!bh) { 151 if (!bh) {
151 ext4_error (sb, "ext4_readdir", 152 if (!dir_has_error) {
152 "directory #%lu contains a hole at offset %lu", 153 ext4_error(sb, __func__, "directory #%lu "
153 inode->i_ino, (unsigned long)filp->f_pos); 154 "contains a hole at offset %Lu",
155 inode->i_ino,
156 (unsigned long long) filp->f_pos);
157 dir_has_error = 1;
158 }
154 /* corrupt size? Maybe no more blocks to read */ 159 /* corrupt size? Maybe no more blocks to read */
155 if (filp->f_pos > inode->i_blocks << 9) 160 if (filp->f_pos > inode->i_blocks << 9)
156 break; 161 break;
@@ -187,14 +192,14 @@ revalidate:
187 while (!error && filp->f_pos < inode->i_size 192 while (!error && filp->f_pos < inode->i_size
188 && offset < sb->s_blocksize) { 193 && offset < sb->s_blocksize) {
189 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); 194 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
190 if (!ext4_check_dir_entry ("ext4_readdir", inode, de, 195 if (!ext4_check_dir_entry("ext4_readdir", inode, de,
191 bh, offset)) { 196 bh, offset)) {
192 /* 197 /*
193 * On error, skip the f_pos to the next block 198 * On error, skip the f_pos to the next block
194 */ 199 */
195 filp->f_pos = (filp->f_pos | 200 filp->f_pos = (filp->f_pos |
196 (sb->s_blocksize - 1)) + 1; 201 (sb->s_blocksize - 1)) + 1;
197 brelse (bh); 202 brelse(bh);
198 ret = stored; 203 ret = stored;
199 goto out; 204 goto out;
200 } 205 }
@@ -218,12 +223,12 @@ revalidate:
218 break; 223 break;
219 if (version != filp->f_version) 224 if (version != filp->f_version)
220 goto revalidate; 225 goto revalidate;
221 stored ++; 226 stored++;
222 } 227 }
223 filp->f_pos += ext4_rec_len_from_disk(de->rec_len); 228 filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
224 } 229 }
225 offset = 0; 230 offset = 0;
226 brelse (bh); 231 brelse(bh);
227 } 232 }
228out: 233out:
229 return ret; 234 return ret;
@@ -290,9 +295,9 @@ static void free_rb_tree_fname(struct rb_root *root)
290 parent = rb_parent(n); 295 parent = rb_parent(n);
291 fname = rb_entry(n, struct fname, rb_hash); 296 fname = rb_entry(n, struct fname, rb_hash);
292 while (fname) { 297 while (fname) {
293 struct fname * old = fname; 298 struct fname *old = fname;
294 fname = fname->next; 299 fname = fname->next;
295 kfree (old); 300 kfree(old);
296 } 301 }
297 if (!parent) 302 if (!parent)
298 root->rb_node = NULL; 303 root->rb_node = NULL;
@@ -331,7 +336,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
331 struct ext4_dir_entry_2 *dirent) 336 struct ext4_dir_entry_2 *dirent)
332{ 337{
333 struct rb_node **p, *parent = NULL; 338 struct rb_node **p, *parent = NULL;
334 struct fname * fname, *new_fn; 339 struct fname *fname, *new_fn;
335 struct dir_private_info *info; 340 struct dir_private_info *info;
336 int len; 341 int len;
337 342
@@ -388,19 +393,20 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
388 * for all entres on the fname linked list. (Normally there is only 393 * for all entres on the fname linked list. (Normally there is only
389 * one entry on the linked list, unless there are 62 bit hash collisions.) 394 * one entry on the linked list, unless there are 62 bit hash collisions.)
390 */ 395 */
391static int call_filldir(struct file * filp, void * dirent, 396static int call_filldir(struct file *filp, void *dirent,
392 filldir_t filldir, struct fname *fname) 397 filldir_t filldir, struct fname *fname)
393{ 398{
394 struct dir_private_info *info = filp->private_data; 399 struct dir_private_info *info = filp->private_data;
395 loff_t curr_pos; 400 loff_t curr_pos;
396 struct inode *inode = filp->f_path.dentry->d_inode; 401 struct inode *inode = filp->f_path.dentry->d_inode;
397 struct super_block * sb; 402 struct super_block *sb;
398 int error; 403 int error;
399 404
400 sb = inode->i_sb; 405 sb = inode->i_sb;
401 406
402 if (!fname) { 407 if (!fname) {
403 printk("call_filldir: called with null fname?!?\n"); 408 printk(KERN_ERR "ext4: call_filldir: called with "
409 "null fname?!?\n");
404 return 0; 410 return 0;
405 } 411 }
406 curr_pos = hash2pos(fname->hash, fname->minor_hash); 412 curr_pos = hash2pos(fname->hash, fname->minor_hash);
@@ -419,8 +425,8 @@ static int call_filldir(struct file * filp, void * dirent,
419 return 0; 425 return 0;
420} 426}
421 427
422static int ext4_dx_readdir(struct file * filp, 428static int ext4_dx_readdir(struct file *filp,
423 void * dirent, filldir_t filldir) 429 void *dirent, filldir_t filldir)
424{ 430{
425 struct dir_private_info *info = filp->private_data; 431 struct dir_private_info *info = filp->private_data;
426 struct inode *inode = filp->f_path.dentry->d_inode; 432 struct inode *inode = filp->f_path.dentry->d_inode;
@@ -511,7 +517,7 @@ finished:
511 return 0; 517 return 0;
512} 518}
513 519
514static int ext4_release_dir (struct inode * inode, struct file * filp) 520static int ext4_release_dir(struct inode *inode, struct file *filp)
515{ 521{
516 if (filp->private_data) 522 if (filp->private_data)
517 ext4_htree_free_dir_info(filp->private_data); 523 ext4_htree_free_dir_info(filp->private_data);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 295003241d3d..6690a41cdd9f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -44,9 +44,9 @@
44#ifdef EXT4FS_DEBUG 44#ifdef EXT4FS_DEBUG
45#define ext4_debug(f, a...) \ 45#define ext4_debug(f, a...) \
46 do { \ 46 do { \
47 printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ 47 printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
48 __FILE__, __LINE__, __func__); \ 48 __FILE__, __LINE__, __func__); \
49 printk (KERN_DEBUG f, ## a); \ 49 printk(KERN_DEBUG f, ## a); \
50 } while (0) 50 } while (0)
51#else 51#else
52#define ext4_debug(f, a...) do {} while (0) 52#define ext4_debug(f, a...) do {} while (0)
@@ -128,7 +128,7 @@ struct ext4_allocation_request {
128#else 128#else
129# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) 129# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
130#endif 130#endif
131#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) 131#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
132#ifdef __KERNEL__ 132#ifdef __KERNEL__
133# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) 133# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
134#else 134#else
@@ -245,7 +245,7 @@ struct flex_groups {
245#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ 245#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
246 246
247#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ 247#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
248#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ 248#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */
249 249
250/* 250/*
251 * Inode dynamic state flags 251 * Inode dynamic state flags
@@ -291,8 +291,6 @@ struct ext4_new_group_data {
291#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS 291#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
292#define EXT4_IOC_GETVERSION _IOR('f', 3, long) 292#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
293#define EXT4_IOC_SETVERSION _IOW('f', 4, long) 293#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
294#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
295#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
296#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION 294#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
297#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION 295#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
298#ifdef CONFIG_JBD2_DEBUG 296#ifdef CONFIG_JBD2_DEBUG
@@ -300,7 +298,10 @@ struct ext4_new_group_data {
300#endif 298#endif
301#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) 299#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
302#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) 300#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
303#define EXT4_IOC_MIGRATE _IO('f', 7) 301#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
302#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
303#define EXT4_IOC_MIGRATE _IO('f', 9)
304 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
304 305
305/* 306/*
306 * ioctl commands in 32 bit emulation 307 * ioctl commands in 32 bit emulation
@@ -538,8 +539,9 @@ do { \
538#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 539#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
539#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 540#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
540#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 541#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
541#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
542#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 542#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
543#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
544
543/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ 545/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
544#ifndef _LINUX_EXT2_FS_H 546#ifndef _LINUX_EXT2_FS_H
545#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 547#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
@@ -667,7 +669,7 @@ struct ext4_super_block {
667}; 669};
668 670
669#ifdef __KERNEL__ 671#ifdef __KERNEL__
670static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) 672static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
671{ 673{
672 return sb->s_fs_info; 674 return sb->s_fs_info;
673} 675}
@@ -725,11 +727,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
725 */ 727 */
726 728
727#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ 729#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
728 ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) 730 (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask))
729#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ 731#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
730 ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) 732 (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask))
731#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ 733#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
732 ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) 734 (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask))
733#define EXT4_SET_COMPAT_FEATURE(sb,mask) \ 735#define EXT4_SET_COMPAT_FEATURE(sb,mask) \
734 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) 736 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
735#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ 737#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
@@ -789,6 +791,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
789#define EXT4_DEF_RESUID 0 791#define EXT4_DEF_RESUID 0
790#define EXT4_DEF_RESGID 0 792#define EXT4_DEF_RESGID 0
791 793
794#define EXT4_DEF_INODE_READAHEAD_BLKS 32
795
792/* 796/*
793 * Default mount options 797 * Default mount options
794 */ 798 */
@@ -954,6 +958,24 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
954void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 958void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
955 unsigned long *blockgrpp, ext4_grpblk_t *offsetp); 959 unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
956 960
961extern struct proc_dir_entry *ext4_proc_root;
962
963#ifdef CONFIG_PROC_FS
964extern const struct file_operations ext4_ui_proc_fops;
965
966#define EXT4_PROC_HANDLER(name, var) \
967do { \
968 proc = proc_create_data(name, mode, sbi->s_proc, \
969 &ext4_ui_proc_fops, &sbi->s_##var); \
970 if (proc == NULL) { \
971 printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \
972 goto err_out; \
973 } \
974} while (0)
975#else
976#define EXT4_PROC_HANDLER(name, var)
977#endif
978
957/* 979/*
958 * Function prototypes 980 * Function prototypes
959 */ 981 */
@@ -981,23 +1003,20 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
981extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, 1003extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
982 ext4_lblk_t iblock, ext4_fsblk_t goal, 1004 ext4_lblk_t iblock, ext4_fsblk_t goal,
983 unsigned long *count, int *errp); 1005 unsigned long *count, int *errp);
984extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, 1006extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
985 ext4_fsblk_t goal, unsigned long *count, int *errp);
986extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, 1007extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
987 ext4_fsblk_t nblocks); 1008 s64 nblocks);
988extern void ext4_free_blocks (handle_t *handle, struct inode *inode, 1009extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
989 ext4_fsblk_t block, unsigned long count, int metadata); 1010 ext4_fsblk_t block, unsigned long count, int metadata);
990extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, 1011extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
991 ext4_fsblk_t block, unsigned long count, 1012 ext4_fsblk_t block, unsigned long count,
992 unsigned long *pdquot_freed_blocks); 1013 unsigned long *pdquot_freed_blocks);
993extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); 1014extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
994extern void ext4_check_blocks_bitmap (struct super_block *); 1015extern void ext4_check_blocks_bitmap(struct super_block *);
995extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 1016extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
996 ext4_group_t block_group, 1017 ext4_group_t block_group,
997 struct buffer_head ** bh); 1018 struct buffer_head ** bh);
998extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1019extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
999extern void ext4_init_block_alloc_info(struct inode *);
1000extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
1001 1020
1002/* dir.c */ 1021/* dir.c */
1003extern int ext4_check_dir_entry(const char *, struct inode *, 1022extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1009,20 +1028,20 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
1009extern void ext4_htree_free_dir_info(struct dir_private_info *p); 1028extern void ext4_htree_free_dir_info(struct dir_private_info *p);
1010 1029
1011/* fsync.c */ 1030/* fsync.c */
1012extern int ext4_sync_file (struct file *, struct dentry *, int); 1031extern int ext4_sync_file(struct file *, struct dentry *, int);
1013 1032
1014/* hash.c */ 1033/* hash.c */
1015extern int ext4fs_dirhash(const char *name, int len, struct 1034extern int ext4fs_dirhash(const char *name, int len, struct
1016 dx_hash_info *hinfo); 1035 dx_hash_info *hinfo);
1017 1036
1018/* ialloc.c */ 1037/* ialloc.c */
1019extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); 1038extern struct inode * ext4_new_inode(handle_t *, struct inode *, int);
1020extern void ext4_free_inode (handle_t *, struct inode *); 1039extern void ext4_free_inode(handle_t *, struct inode *);
1021extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); 1040extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1022extern unsigned long ext4_count_free_inodes (struct super_block *); 1041extern unsigned long ext4_count_free_inodes(struct super_block *);
1023extern unsigned long ext4_count_dirs (struct super_block *); 1042extern unsigned long ext4_count_dirs(struct super_block *);
1024extern void ext4_check_inodes_bitmap (struct super_block *); 1043extern void ext4_check_inodes_bitmap(struct super_block *);
1025extern unsigned long ext4_count_free (struct buffer_head *, unsigned); 1044extern unsigned long ext4_count_free(struct buffer_head *, unsigned);
1026 1045
1027/* mballoc.c */ 1046/* mballoc.c */
1028extern long ext4_mb_stats; 1047extern long ext4_mb_stats;
@@ -1032,7 +1051,7 @@ extern int ext4_mb_release(struct super_block *);
1032extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, 1051extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
1033 struct ext4_allocation_request *, int *); 1052 struct ext4_allocation_request *, int *);
1034extern int ext4_mb_reserve_blocks(struct super_block *, int); 1053extern int ext4_mb_reserve_blocks(struct super_block *, int);
1035extern void ext4_mb_discard_inode_preallocations(struct inode *); 1054extern void ext4_discard_preallocations(struct inode *);
1036extern int __init init_ext4_mballoc(void); 1055extern int __init init_ext4_mballoc(void);
1037extern void exit_ext4_mballoc(void); 1056extern void exit_ext4_mballoc(void);
1038extern void ext4_mb_free_blocks(handle_t *, struct inode *, 1057extern void ext4_mb_free_blocks(handle_t *, struct inode *,
@@ -1050,24 +1069,25 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1050 ext4_lblk_t, int, int *); 1069 ext4_lblk_t, int, int *);
1051struct buffer_head *ext4_bread(handle_t *, struct inode *, 1070struct buffer_head *ext4_bread(handle_t *, struct inode *,
1052 ext4_lblk_t, int, int *); 1071 ext4_lblk_t, int, int *);
1072int ext4_get_block(struct inode *inode, sector_t iblock,
1073 struct buffer_head *bh_result, int create);
1053int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, 1074int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
1054 ext4_lblk_t iblock, unsigned long maxblocks, 1075 ext4_lblk_t iblock, unsigned long maxblocks,
1055 struct buffer_head *bh_result, 1076 struct buffer_head *bh_result,
1056 int create, int extend_disksize); 1077 int create, int extend_disksize);
1057 1078
1058extern struct inode *ext4_iget(struct super_block *, unsigned long); 1079extern struct inode *ext4_iget(struct super_block *, unsigned long);
1059extern int ext4_write_inode (struct inode *, int); 1080extern int ext4_write_inode(struct inode *, int);
1060extern int ext4_setattr (struct dentry *, struct iattr *); 1081extern int ext4_setattr(struct dentry *, struct iattr *);
1061extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, 1082extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
1062 struct kstat *stat); 1083 struct kstat *stat);
1063extern void ext4_delete_inode (struct inode *); 1084extern void ext4_delete_inode(struct inode *);
1064extern int ext4_sync_inode (handle_t *, struct inode *); 1085extern int ext4_sync_inode(handle_t *, struct inode *);
1065extern void ext4_discard_reservation (struct inode *);
1066extern void ext4_dirty_inode(struct inode *); 1086extern void ext4_dirty_inode(struct inode *);
1067extern int ext4_change_inode_journal_flag(struct inode *, int); 1087extern int ext4_change_inode_journal_flag(struct inode *, int);
1068extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); 1088extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
1069extern int ext4_can_truncate(struct inode *inode); 1089extern int ext4_can_truncate(struct inode *inode);
1070extern void ext4_truncate (struct inode *); 1090extern void ext4_truncate(struct inode *);
1071extern void ext4_set_inode_flags(struct inode *); 1091extern void ext4_set_inode_flags(struct inode *);
1072extern void ext4_get_inode_flags(struct ext4_inode_info *); 1092extern void ext4_get_inode_flags(struct ext4_inode_info *);
1073extern void ext4_set_aops(struct inode *inode); 1093extern void ext4_set_aops(struct inode *inode);
@@ -1080,11 +1100,10 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
1080 1100
1081/* ioctl.c */ 1101/* ioctl.c */
1082extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1102extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1083extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); 1103extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
1084 1104
1085/* migrate.c */ 1105/* migrate.c */
1086extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, 1106extern int ext4_ext_migrate(struct inode *);
1087 unsigned long);
1088/* namei.c */ 1107/* namei.c */
1089extern int ext4_orphan_add(handle_t *, struct inode *); 1108extern int ext4_orphan_add(handle_t *, struct inode *);
1090extern int ext4_orphan_del(handle_t *, struct inode *); 1109extern int ext4_orphan_del(handle_t *, struct inode *);
@@ -1099,14 +1118,14 @@ extern int ext4_group_extend(struct super_block *sb,
1099 ext4_fsblk_t n_blocks_count); 1118 ext4_fsblk_t n_blocks_count);
1100 1119
1101/* super.c */ 1120/* super.c */
1102extern void ext4_error (struct super_block *, const char *, const char *, ...) 1121extern void ext4_error(struct super_block *, const char *, const char *, ...)
1103 __attribute__ ((format (printf, 3, 4))); 1122 __attribute__ ((format (printf, 3, 4)));
1104extern void __ext4_std_error (struct super_block *, const char *, int); 1123extern void __ext4_std_error(struct super_block *, const char *, int);
1105extern void ext4_abort (struct super_block *, const char *, const char *, ...) 1124extern void ext4_abort(struct super_block *, const char *, const char *, ...)
1106 __attribute__ ((format (printf, 3, 4))); 1125 __attribute__ ((format (printf, 3, 4)));
1107extern void ext4_warning (struct super_block *, const char *, const char *, ...) 1126extern void ext4_warning(struct super_block *, const char *, const char *, ...)
1108 __attribute__ ((format (printf, 3, 4))); 1127 __attribute__ ((format (printf, 3, 4)));
1109extern void ext4_update_dynamic_rev (struct super_block *sb); 1128extern void ext4_update_dynamic_rev(struct super_block *sb);
1110extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, 1129extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
1111 __u32 compat); 1130 __u32 compat);
1112extern int ext4_update_rocompat_feature(handle_t *handle, 1131extern int ext4_update_rocompat_feature(handle_t *handle,
@@ -1179,7 +1198,7 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
1179 1198
1180static inline 1199static inline
1181struct ext4_group_info *ext4_get_group_info(struct super_block *sb, 1200struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
1182 ext4_group_t group) 1201 ext4_group_t group)
1183{ 1202{
1184 struct ext4_group_info ***grp_info; 1203 struct ext4_group_info ***grp_info;
1185 long indexv, indexh; 1204 long indexv, indexh;
@@ -1207,6 +1226,28 @@ do { \
1207 __ext4_std_error((sb), __func__, (errno)); \ 1226 __ext4_std_error((sb), __func__, (errno)); \
1208} while (0) 1227} while (0)
1209 1228
1229#ifdef CONFIG_SMP
1230/* Each CPU can accumulate FBC_BATCH blocks in their local
1231 * counters. So we need to make sure we have free blocks more
1232 * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
1233 */
1234#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
1235#else
1236#define EXT4_FREEBLOCKS_WATERMARK 0
1237#endif
1238
1239static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
1240{
1241 /*
1242 * XXX: replace with spinlock if seen contended -bzzz
1243 */
1244 down_write(&EXT4_I(inode)->i_data_sem);
1245 if (newsize > EXT4_I(inode)->i_disksize)
1246 EXT4_I(inode)->i_disksize = newsize;
1247 up_write(&EXT4_I(inode)->i_data_sem);
1248 return ;
1249}
1250
1210/* 1251/*
1211 * Inodes and files operations 1252 * Inodes and files operations
1212 */ 1253 */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index d33dc56d6986..bec7ce59fc0d 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -124,6 +124,19 @@ struct ext4_ext_path {
124#define EXT4_EXT_CACHE_GAP 1 124#define EXT4_EXT_CACHE_GAP 1
125#define EXT4_EXT_CACHE_EXTENT 2 125#define EXT4_EXT_CACHE_EXTENT 2
126 126
127/*
128 * to be called by ext4_ext_walk_space()
129 * negative retcode - error
130 * positive retcode - signal for ext4_ext_walk_space(), see below
131 * callback must return valid extent (passed or newly created)
132 */
133typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
134 struct ext4_ext_cache *,
135 struct ext4_extent *, void *);
136
137#define EXT_CONTINUE 0
138#define EXT_BREAK 1
139#define EXT_REPEAT 2
127 140
128#define EXT_MAX_BLOCK 0xffffffff 141#define EXT_MAX_BLOCK 0xffffffff
129 142
@@ -224,6 +237,8 @@ extern int ext4_ext_try_to_merge(struct inode *inode,
224 struct ext4_extent *); 237 struct ext4_extent *);
225extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); 238extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
226extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); 239extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
240extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
241 ext_prepare_callback, void *);
227extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, 242extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
228 struct ext4_ext_path *); 243 struct ext4_ext_path *);
229extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, 244extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
index ef7409f0e7e4..5c124c0ac6d3 100644
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -33,38 +33,6 @@ typedef __u32 ext4_lblk_t;
33/* data type for block group number */ 33/* data type for block group number */
34typedef unsigned long ext4_group_t; 34typedef unsigned long ext4_group_t;
35 35
36struct ext4_reserve_window {
37 ext4_fsblk_t _rsv_start; /* First byte reserved */
38 ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */
39};
40
41struct ext4_reserve_window_node {
42 struct rb_node rsv_node;
43 __u32 rsv_goal_size;
44 __u32 rsv_alloc_hit;
45 struct ext4_reserve_window rsv_window;
46};
47
48struct ext4_block_alloc_info {
49 /* information about reservation window */
50 struct ext4_reserve_window_node rsv_window_node;
51 /*
52 * was i_next_alloc_block in ext4_inode_info
53 * is the logical (file-relative) number of the
54 * most-recently-allocated block in this file.
55 * We use this for detecting linearly ascending allocation requests.
56 */
57 ext4_lblk_t last_alloc_logical_block;
58 /*
59 * Was i_next_alloc_goal in ext4_inode_info
60 * is the *physical* companion to i_next_alloc_block.
61 * it the physical block number of the block which was most-recentl
62 * allocated to this file. This give us the goal (target) for the next
63 * allocation when we detect linearly ascending requests.
64 */
65 ext4_fsblk_t last_alloc_physical_block;
66};
67
68#define rsv_start rsv_window._rsv_start 36#define rsv_start rsv_window._rsv_start
69#define rsv_end rsv_window._rsv_end 37#define rsv_end rsv_window._rsv_end
70 38
@@ -97,11 +65,8 @@ struct ext4_inode_info {
97 ext4_group_t i_block_group; 65 ext4_group_t i_block_group;
98 __u32 i_state; /* Dynamic state flags for ext4 */ 66 __u32 i_state; /* Dynamic state flags for ext4 */
99 67
100 /* block reservation info */
101 struct ext4_block_alloc_info *i_block_alloc_info;
102
103 ext4_lblk_t i_dir_start_lookup; 68 ext4_lblk_t i_dir_start_lookup;
104#ifdef CONFIG_EXT4DEV_FS_XATTR 69#ifdef CONFIG_EXT4_FS_XATTR
105 /* 70 /*
106 * Extended attributes can be read independently of the main file 71 * Extended attributes can be read independently of the main file
107 * data. Taking i_mutex even when reading would cause contention 72 * data. Taking i_mutex even when reading would cause contention
@@ -111,7 +76,7 @@ struct ext4_inode_info {
111 */ 76 */
112 struct rw_semaphore xattr_sem; 77 struct rw_semaphore xattr_sem;
113#endif 78#endif
114#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 79#ifdef CONFIG_EXT4_FS_POSIX_ACL
115 struct posix_acl *i_acl; 80 struct posix_acl *i_acl;
116 struct posix_acl *i_default_acl; 81 struct posix_acl *i_default_acl;
117#endif 82#endif
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 6300226d5531..6a0b40d43264 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -40,8 +40,8 @@ struct ext4_sb_info {
40 unsigned long s_blocks_last; /* Last seen block count */ 40 unsigned long s_blocks_last; /* Last seen block count */
41 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ 41 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
42 struct buffer_head * s_sbh; /* Buffer containing the super block */ 42 struct buffer_head * s_sbh; /* Buffer containing the super block */
43 struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ 43 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
44 struct buffer_head ** s_group_desc; 44 struct buffer_head **s_group_desc;
45 unsigned long s_mount_opt; 45 unsigned long s_mount_opt;
46 ext4_fsblk_t s_sb_block; 46 ext4_fsblk_t s_sb_block;
47 uid_t s_resuid; 47 uid_t s_resuid;
@@ -52,6 +52,7 @@ struct ext4_sb_info {
52 int s_desc_per_block_bits; 52 int s_desc_per_block_bits;
53 int s_inode_size; 53 int s_inode_size;
54 int s_first_ino; 54 int s_first_ino;
55 unsigned int s_inode_readahead_blks;
55 spinlock_t s_next_gen_lock; 56 spinlock_t s_next_gen_lock;
56 u32 s_next_generation; 57 u32 s_next_generation;
57 u32 s_hash_seed[4]; 58 u32 s_hash_seed[4];
@@ -59,16 +60,17 @@ struct ext4_sb_info {
59 struct percpu_counter s_freeblocks_counter; 60 struct percpu_counter s_freeblocks_counter;
60 struct percpu_counter s_freeinodes_counter; 61 struct percpu_counter s_freeinodes_counter;
61 struct percpu_counter s_dirs_counter; 62 struct percpu_counter s_dirs_counter;
63 struct percpu_counter s_dirtyblocks_counter;
62 struct blockgroup_lock s_blockgroup_lock; 64 struct blockgroup_lock s_blockgroup_lock;
65 struct proc_dir_entry *s_proc;
63 66
64 /* root of the per fs reservation window tree */ 67 /* root of the per fs reservation window tree */
65 spinlock_t s_rsv_window_lock; 68 spinlock_t s_rsv_window_lock;
66 struct rb_root s_rsv_window_root; 69 struct rb_root s_rsv_window_root;
67 struct ext4_reserve_window_node s_rsv_window_head;
68 70
69 /* Journaling */ 71 /* Journaling */
70 struct inode * s_journal_inode; 72 struct inode *s_journal_inode;
71 struct journal_s * s_journal; 73 struct journal_s *s_journal;
72 struct list_head s_orphan; 74 struct list_head s_orphan;
73 unsigned long s_commit_interval; 75 unsigned long s_commit_interval;
74 struct block_device *journal_bdev; 76 struct block_device *journal_bdev;
@@ -106,12 +108,12 @@ struct ext4_sb_info {
106 108
107 /* tunables */ 109 /* tunables */
108 unsigned long s_stripe; 110 unsigned long s_stripe;
109 unsigned long s_mb_stream_request; 111 unsigned int s_mb_stream_request;
110 unsigned long s_mb_max_to_scan; 112 unsigned int s_mb_max_to_scan;
111 unsigned long s_mb_min_to_scan; 113 unsigned int s_mb_min_to_scan;
112 unsigned long s_mb_stats; 114 unsigned int s_mb_stats;
113 unsigned long s_mb_order2_reqs; 115 unsigned int s_mb_order2_reqs;
114 unsigned long s_mb_group_prealloc; 116 unsigned int s_mb_group_prealloc;
115 /* where last allocation was done - for stream allocation */ 117 /* where last allocation was done - for stream allocation */
116 unsigned long s_mb_last_group; 118 unsigned long s_mb_last_group;
117 unsigned long s_mb_last_start; 119 unsigned long s_mb_last_start;
@@ -121,7 +123,6 @@ struct ext4_sb_info {
121 int s_mb_history_cur; 123 int s_mb_history_cur;
122 int s_mb_history_max; 124 int s_mb_history_max;
123 int s_mb_history_num; 125 int s_mb_history_num;
124 struct proc_dir_entry *s_mb_proc;
125 spinlock_t s_mb_history_lock; 126 spinlock_t s_mb_history_lock;
126 int s_mb_history_filter; 127 int s_mb_history_filter;
127 128
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b24d3c53f20c..ea2ce3c0ae66 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -40,6 +40,7 @@
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/falloc.h> 41#include <linux/falloc.h>
42#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include <linux/fiemap.h>
43#include "ext4_jbd2.h" 44#include "ext4_jbd2.h"
44#include "ext4_extents.h" 45#include "ext4_extents.h"
45 46
@@ -383,8 +384,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
383 ext_debug("\n"); 384 ext_debug("\n");
384} 385}
385#else 386#else
386#define ext4_ext_show_path(inode,path) 387#define ext4_ext_show_path(inode, path)
387#define ext4_ext_show_leaf(inode,path) 388#define ext4_ext_show_leaf(inode, path)
388#endif 389#endif
389 390
390void ext4_ext_drop_refs(struct ext4_ext_path *path) 391void ext4_ext_drop_refs(struct ext4_ext_path *path)
@@ -440,9 +441,10 @@ ext4_ext_binsearch_idx(struct inode *inode,
440 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { 441 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
441 if (k != 0 && 442 if (k != 0 &&
442 le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { 443 le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
443 printk("k=%d, ix=0x%p, first=0x%p\n", k, 444 printk(KERN_DEBUG "k=%d, ix=0x%p, "
444 ix, EXT_FIRST_INDEX(eh)); 445 "first=0x%p\n", k,
445 printk("%u <= %u\n", 446 ix, EXT_FIRST_INDEX(eh));
447 printk(KERN_DEBUG "%u <= %u\n",
446 le32_to_cpu(ix->ei_block), 448 le32_to_cpu(ix->ei_block),
447 le32_to_cpu(ix[-1].ei_block)); 449 le32_to_cpu(ix[-1].ei_block));
448 } 450 }
@@ -1475,7 +1477,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1475 struct ext4_ext_path *path, 1477 struct ext4_ext_path *path,
1476 struct ext4_extent *newext) 1478 struct ext4_extent *newext)
1477{ 1479{
1478 struct ext4_extent_header * eh; 1480 struct ext4_extent_header *eh;
1479 struct ext4_extent *ex, *fex; 1481 struct ext4_extent *ex, *fex;
1480 struct ext4_extent *nearex; /* nearest extent */ 1482 struct ext4_extent *nearex; /* nearest extent */
1481 struct ext4_ext_path *npath = NULL; 1483 struct ext4_ext_path *npath = NULL;
@@ -1625,6 +1627,113 @@ cleanup:
1625 return err; 1627 return err;
1626} 1628}
1627 1629
1630int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1631 ext4_lblk_t num, ext_prepare_callback func,
1632 void *cbdata)
1633{
1634 struct ext4_ext_path *path = NULL;
1635 struct ext4_ext_cache cbex;
1636 struct ext4_extent *ex;
1637 ext4_lblk_t next, start = 0, end = 0;
1638 ext4_lblk_t last = block + num;
1639 int depth, exists, err = 0;
1640
1641 BUG_ON(func == NULL);
1642 BUG_ON(inode == NULL);
1643
1644 while (block < last && block != EXT_MAX_BLOCK) {
1645 num = last - block;
1646 /* find extent for this block */
1647 path = ext4_ext_find_extent(inode, block, path);
1648 if (IS_ERR(path)) {
1649 err = PTR_ERR(path);
1650 path = NULL;
1651 break;
1652 }
1653
1654 depth = ext_depth(inode);
1655 BUG_ON(path[depth].p_hdr == NULL);
1656 ex = path[depth].p_ext;
1657 next = ext4_ext_next_allocated_block(path);
1658
1659 exists = 0;
1660 if (!ex) {
1661 /* there is no extent yet, so try to allocate
1662 * all requested space */
1663 start = block;
1664 end = block + num;
1665 } else if (le32_to_cpu(ex->ee_block) > block) {
1666 /* need to allocate space before found extent */
1667 start = block;
1668 end = le32_to_cpu(ex->ee_block);
1669 if (block + num < end)
1670 end = block + num;
1671 } else if (block >= le32_to_cpu(ex->ee_block)
1672 + ext4_ext_get_actual_len(ex)) {
1673 /* need to allocate space after found extent */
1674 start = block;
1675 end = block + num;
1676 if (end >= next)
1677 end = next;
1678 } else if (block >= le32_to_cpu(ex->ee_block)) {
1679 /*
1680 * some part of requested space is covered
1681 * by found extent
1682 */
1683 start = block;
1684 end = le32_to_cpu(ex->ee_block)
1685 + ext4_ext_get_actual_len(ex);
1686 if (block + num < end)
1687 end = block + num;
1688 exists = 1;
1689 } else {
1690 BUG();
1691 }
1692 BUG_ON(end <= start);
1693
1694 if (!exists) {
1695 cbex.ec_block = start;
1696 cbex.ec_len = end - start;
1697 cbex.ec_start = 0;
1698 cbex.ec_type = EXT4_EXT_CACHE_GAP;
1699 } else {
1700 cbex.ec_block = le32_to_cpu(ex->ee_block);
1701 cbex.ec_len = ext4_ext_get_actual_len(ex);
1702 cbex.ec_start = ext_pblock(ex);
1703 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1704 }
1705
1706 BUG_ON(cbex.ec_len == 0);
1707 err = func(inode, path, &cbex, ex, cbdata);
1708 ext4_ext_drop_refs(path);
1709
1710 if (err < 0)
1711 break;
1712
1713 if (err == EXT_REPEAT)
1714 continue;
1715 else if (err == EXT_BREAK) {
1716 err = 0;
1717 break;
1718 }
1719
1720 if (ext_depth(inode) != depth) {
1721 /* depth was changed. we have to realloc path */
1722 kfree(path);
1723 path = NULL;
1724 }
1725
1726 block = cbex.ec_block + cbex.ec_len;
1727 }
1728
1729 if (path) {
1730 ext4_ext_drop_refs(path);
1731 kfree(path);
1732 }
1733
1734 return err;
1735}
1736
1628static void 1737static void
1629ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, 1738ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
1630 __u32 len, ext4_fsblk_t start, int type) 1739 __u32 len, ext4_fsblk_t start, int type)
@@ -2142,7 +2251,7 @@ void ext4_ext_init(struct super_block *sb)
2142 */ 2251 */
2143 2252
2144 if (test_opt(sb, EXTENTS)) { 2253 if (test_opt(sb, EXTENTS)) {
2145 printk("EXT4-fs: file extents enabled"); 2254 printk(KERN_INFO "EXT4-fs: file extents enabled");
2146#ifdef AGGRESSIVE_TEST 2255#ifdef AGGRESSIVE_TEST
2147 printk(", aggressive tests"); 2256 printk(", aggressive tests");
2148#endif 2257#endif
@@ -2696,11 +2805,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2696 goto out2; 2805 goto out2;
2697 } 2806 }
2698 /* 2807 /*
2699 * Okay, we need to do block allocation. Lazily initialize the block 2808 * Okay, we need to do block allocation.
2700 * allocation info here if necessary.
2701 */ 2809 */
2702 if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
2703 ext4_init_block_alloc_info(inode);
2704 2810
2705 /* find neighbour allocated blocks */ 2811 /* find neighbour allocated blocks */
2706 ar.lleft = iblock; 2812 ar.lleft = iblock;
@@ -2760,7 +2866,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2760 /* free data blocks we just allocated */ 2866 /* free data blocks we just allocated */
2761 /* not a good idea to call discard here directly, 2867 /* not a good idea to call discard here directly,
2762 * but otherwise we'd need to call it every free() */ 2868 * but otherwise we'd need to call it every free() */
2763 ext4_mb_discard_inode_preallocations(inode); 2869 ext4_discard_preallocations(inode);
2764 ext4_free_blocks(handle, inode, ext_pblock(&newex), 2870 ext4_free_blocks(handle, inode, ext_pblock(&newex),
2765 ext4_ext_get_actual_len(&newex), 0); 2871 ext4_ext_get_actual_len(&newex), 0);
2766 goto out2; 2872 goto out2;
@@ -2824,7 +2930,7 @@ void ext4_ext_truncate(struct inode *inode)
2824 down_write(&EXT4_I(inode)->i_data_sem); 2930 down_write(&EXT4_I(inode)->i_data_sem);
2825 ext4_ext_invalidate_cache(inode); 2931 ext4_ext_invalidate_cache(inode);
2826 2932
2827 ext4_discard_reservation(inode); 2933 ext4_discard_preallocations(inode);
2828 2934
2829 /* 2935 /*
2830 * TODO: optimization is possible here. 2936 * TODO: optimization is possible here.
@@ -2877,10 +2983,11 @@ static void ext4_falloc_update_inode(struct inode *inode,
2877 * Update only when preallocation was requested beyond 2983 * Update only when preallocation was requested beyond
2878 * the file size. 2984 * the file size.
2879 */ 2985 */
2880 if (!(mode & FALLOC_FL_KEEP_SIZE) && 2986 if (!(mode & FALLOC_FL_KEEP_SIZE)) {
2881 new_size > i_size_read(inode)) { 2987 if (new_size > i_size_read(inode))
2882 i_size_write(inode, new_size); 2988 i_size_write(inode, new_size);
2883 EXT4_I(inode)->i_disksize = new_size; 2989 if (new_size > EXT4_I(inode)->i_disksize)
2990 ext4_update_i_disksize(inode, new_size);
2884 } 2991 }
2885 2992
2886} 2993}
@@ -2972,3 +3079,143 @@ retry:
2972 mutex_unlock(&inode->i_mutex); 3079 mutex_unlock(&inode->i_mutex);
2973 return ret > 0 ? ret2 : ret; 3080 return ret > 0 ? ret2 : ret;
2974} 3081}
3082
3083/*
3084 * Callback function called for each extent to gather FIEMAP information.
3085 */
3086int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3087 struct ext4_ext_cache *newex, struct ext4_extent *ex,
3088 void *data)
3089{
3090 struct fiemap_extent_info *fieinfo = data;
3091 unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
3092 __u64 logical;
3093 __u64 physical;
3094 __u64 length;
3095 __u32 flags = 0;
3096 int error;
3097
3098 logical = (__u64)newex->ec_block << blksize_bits;
3099
3100 if (newex->ec_type == EXT4_EXT_CACHE_GAP) {
3101 pgoff_t offset;
3102 struct page *page;
3103 struct buffer_head *bh = NULL;
3104
3105 offset = logical >> PAGE_SHIFT;
3106 page = find_get_page(inode->i_mapping, offset);
3107 if (!page || !page_has_buffers(page))
3108 return EXT_CONTINUE;
3109
3110 bh = page_buffers(page);
3111
3112 if (!bh)
3113 return EXT_CONTINUE;
3114
3115 if (buffer_delay(bh)) {
3116 flags |= FIEMAP_EXTENT_DELALLOC;
3117 page_cache_release(page);
3118 } else {
3119 page_cache_release(page);
3120 return EXT_CONTINUE;
3121 }
3122 }
3123
3124 physical = (__u64)newex->ec_start << blksize_bits;
3125 length = (__u64)newex->ec_len << blksize_bits;
3126
3127 if (ex && ext4_ext_is_uninitialized(ex))
3128 flags |= FIEMAP_EXTENT_UNWRITTEN;
3129
3130 /*
3131 * If this extent reaches EXT_MAX_BLOCK, it must be last.
3132 *
3133 * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
3134 * this also indicates no more allocated blocks.
3135 *
3136 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
3137 */
3138 if (logical + length - 1 == EXT_MAX_BLOCK ||
3139 ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
3140 flags |= FIEMAP_EXTENT_LAST;
3141
3142 error = fiemap_fill_next_extent(fieinfo, logical, physical,
3143 length, flags);
3144 if (error < 0)
3145 return error;
3146 if (error == 1)
3147 return EXT_BREAK;
3148
3149 return EXT_CONTINUE;
3150}
3151
3152/* fiemap flags we can handle specified here */
3153#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
3154
3155int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
3156{
3157 __u64 physical = 0;
3158 __u64 length;
3159 __u32 flags = FIEMAP_EXTENT_LAST;
3160 int blockbits = inode->i_sb->s_blocksize_bits;
3161 int error = 0;
3162
3163 /* in-inode? */
3164 if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) {
3165 struct ext4_iloc iloc;
3166 int offset; /* offset of xattr in inode */
3167
3168 error = ext4_get_inode_loc(inode, &iloc);
3169 if (error)
3170 return error;
3171 physical = iloc.bh->b_blocknr << blockbits;
3172 offset = EXT4_GOOD_OLD_INODE_SIZE +
3173 EXT4_I(inode)->i_extra_isize;
3174 physical += offset;
3175 length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
3176 flags |= FIEMAP_EXTENT_DATA_INLINE;
3177 } else { /* external block */
3178 physical = EXT4_I(inode)->i_file_acl << blockbits;
3179 length = inode->i_sb->s_blocksize;
3180 }
3181
3182 if (physical)
3183 error = fiemap_fill_next_extent(fieinfo, 0, physical,
3184 length, flags);
3185 return (error < 0 ? error : 0);
3186}
3187
3188int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3189 __u64 start, __u64 len)
3190{
3191 ext4_lblk_t start_blk;
3192 ext4_lblk_t len_blks;
3193 int error = 0;
3194
3195 /* fallback to generic here if not in extents fmt */
3196 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
3197 return generic_block_fiemap(inode, fieinfo, start, len,
3198 ext4_get_block);
3199
3200 if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
3201 return -EBADR;
3202
3203 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
3204 error = ext4_xattr_fiemap(inode, fieinfo);
3205 } else {
3206 start_blk = start >> inode->i_sb->s_blocksize_bits;
3207 len_blks = len >> inode->i_sb->s_blocksize_bits;
3208
3209 /*
3210 * Walk the extent tree gathering extent information.
3211 * ext4_ext_fiemap_cb will push extents back to user.
3212 */
3213 down_write(&EXT4_I(inode)->i_data_sem);
3214 error = ext4_ext_walk_space(inode, start_blk, len_blks,
3215 ext4_ext_fiemap_cb, fieinfo);
3216 up_write(&EXT4_I(inode)->i_data_sem);
3217 }
3218
3219 return error;
3220}
3221
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 430eb7978db4..6bd11fba71f7 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -31,14 +31,14 @@
31 * from ext4_file_open: open gets called at every open, but release 31 * from ext4_file_open: open gets called at every open, but release
32 * gets called only when /all/ the files are closed. 32 * gets called only when /all/ the files are closed.
33 */ 33 */
34static int ext4_release_file (struct inode * inode, struct file * filp) 34static int ext4_release_file(struct inode *inode, struct file *filp)
35{ 35{
36 /* if we are the last writer on the inode, drop the block reservation */ 36 /* if we are the last writer on the inode, drop the block reservation */
37 if ((filp->f_mode & FMODE_WRITE) && 37 if ((filp->f_mode & FMODE_WRITE) &&
38 (atomic_read(&inode->i_writecount) == 1)) 38 (atomic_read(&inode->i_writecount) == 1))
39 { 39 {
40 down_write(&EXT4_I(inode)->i_data_sem); 40 down_write(&EXT4_I(inode)->i_data_sem);
41 ext4_discard_reservation(inode); 41 ext4_discard_preallocations(inode);
42 up_write(&EXT4_I(inode)->i_data_sem); 42 up_write(&EXT4_I(inode)->i_data_sem);
43 } 43 }
44 if (is_dx(inode) && filp->private_data) 44 if (is_dx(inode) && filp->private_data)
@@ -140,6 +140,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
140 return 0; 140 return 0;
141} 141}
142 142
143extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
144 __u64 start, __u64 len);
145
143const struct file_operations ext4_file_operations = { 146const struct file_operations ext4_file_operations = {
144 .llseek = generic_file_llseek, 147 .llseek = generic_file_llseek,
145 .read = do_sync_read, 148 .read = do_sync_read,
@@ -162,7 +165,7 @@ const struct inode_operations ext4_file_inode_operations = {
162 .truncate = ext4_truncate, 165 .truncate = ext4_truncate,
163 .setattr = ext4_setattr, 166 .setattr = ext4_setattr,
164 .getattr = ext4_getattr, 167 .getattr = ext4_getattr,
165#ifdef CONFIG_EXT4DEV_FS_XATTR 168#ifdef CONFIG_EXT4_FS_XATTR
166 .setxattr = generic_setxattr, 169 .setxattr = generic_setxattr,
167 .getxattr = generic_getxattr, 170 .getxattr = generic_getxattr,
168 .listxattr = ext4_listxattr, 171 .listxattr = ext4_listxattr,
@@ -170,5 +173,6 @@ const struct inode_operations ext4_file_inode_operations = {
170#endif 173#endif
171 .permission = ext4_permission, 174 .permission = ext4_permission,
172 .fallocate = ext4_fallocate, 175 .fallocate = ext4_fallocate,
176 .fiemap = ext4_fiemap,
173}; 177};
174 178
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index a45c3737ad31..5afe4370840b 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -28,6 +28,7 @@
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/blkdev.h> 30#include <linux/blkdev.h>
31#include <linux/marker.h>
31#include "ext4.h" 32#include "ext4.h"
32#include "ext4_jbd2.h" 33#include "ext4_jbd2.h"
33 34
@@ -43,7 +44,7 @@
43 * inode to disk. 44 * inode to disk.
44 */ 45 */
45 46
46int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) 47int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
47{ 48{
48 struct inode *inode = dentry->d_inode; 49 struct inode *inode = dentry->d_inode;
49 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 50 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
@@ -51,6 +52,10 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
51 52
52 J_ASSERT(ext4_journal_current_handle() == NULL); 53 J_ASSERT(ext4_journal_current_handle() == NULL);
53 54
55 trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
56 inode->i_sb->s_id, datasync, inode->i_ino,
57 dentry->d_parent->d_inode->i_ino);
58
54 /* 59 /*
55 * data=writeback: 60 * data=writeback:
56 * The caller's filemap_fdatawrite()/wait will sync the data. 61 * The caller's filemap_fdatawrite()/wait will sync the data.
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 1d6329dbe390..556ca8eba3db 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
27 sum += DELTA; 27 sum += DELTA;
28 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); 28 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
29 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); 29 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
30 } while(--n); 30 } while (--n);
31 31
32 buf[0] += b0; 32 buf[0] += b0;
33 buf[1] += b1; 33 buf[1] += b1;
@@ -35,7 +35,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
35 35
36 36
37/* The old legacy hash */ 37/* The old legacy hash */
38static __u32 dx_hack_hash (const char *name, int len) 38static __u32 dx_hack_hash(const char *name, int len)
39{ 39{
40 __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; 40 __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
41 while (len--) { 41 while (len--) {
@@ -59,7 +59,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
59 val = pad; 59 val = pad;
60 if (len > num*4) 60 if (len > num*4)
61 len = num * 4; 61 len = num * 4;
62 for (i=0; i < len; i++) { 62 for (i = 0; i < len; i++) {
63 if ((i % 4) == 0) 63 if ((i % 4) == 0)
64 val = pad; 64 val = pad;
65 val = msg[i] + (val << 8); 65 val = msg[i] + (val << 8);
@@ -104,7 +104,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
104 104
105 /* Check to see if the seed is all zero's */ 105 /* Check to see if the seed is all zero's */
106 if (hinfo->seed) { 106 if (hinfo->seed) {
107 for (i=0; i < 4; i++) { 107 for (i = 0; i < 4; i++) {
108 if (hinfo->seed[i]) 108 if (hinfo->seed[i])
109 break; 109 break;
110 } 110 }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f344834bbf58..fe34d74cfb19 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -115,9 +115,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
115 block_group, bitmap_blk); 115 block_group, bitmap_blk);
116 return NULL; 116 return NULL;
117 } 117 }
118 if (bh_uptodate_or_lock(bh)) 118 if (buffer_uptodate(bh) &&
119 !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
119 return bh; 120 return bh;
120 121
122 lock_buffer(bh);
121 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 123 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
122 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 124 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
123 ext4_init_inode_bitmap(sb, bh, block_group, desc); 125 ext4_init_inode_bitmap(sb, bh, block_group, desc);
@@ -154,39 +156,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
154 * though), and then we'd have two inodes sharing the 156 * though), and then we'd have two inodes sharing the
155 * same inode number and space on the harddisk. 157 * same inode number and space on the harddisk.
156 */ 158 */
157void ext4_free_inode (handle_t *handle, struct inode * inode) 159void ext4_free_inode(handle_t *handle, struct inode *inode)
158{ 160{
159 struct super_block * sb = inode->i_sb; 161 struct super_block *sb = inode->i_sb;
160 int is_directory; 162 int is_directory;
161 unsigned long ino; 163 unsigned long ino;
162 struct buffer_head *bitmap_bh = NULL; 164 struct buffer_head *bitmap_bh = NULL;
163 struct buffer_head *bh2; 165 struct buffer_head *bh2;
164 ext4_group_t block_group; 166 ext4_group_t block_group;
165 unsigned long bit; 167 unsigned long bit;
166 struct ext4_group_desc * gdp; 168 struct ext4_group_desc *gdp;
167 struct ext4_super_block * es; 169 struct ext4_super_block *es;
168 struct ext4_sb_info *sbi; 170 struct ext4_sb_info *sbi;
169 int fatal = 0, err; 171 int fatal = 0, err;
170 ext4_group_t flex_group; 172 ext4_group_t flex_group;
171 173
172 if (atomic_read(&inode->i_count) > 1) { 174 if (atomic_read(&inode->i_count) > 1) {
173 printk ("ext4_free_inode: inode has count=%d\n", 175 printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
174 atomic_read(&inode->i_count)); 176 atomic_read(&inode->i_count));
175 return; 177 return;
176 } 178 }
177 if (inode->i_nlink) { 179 if (inode->i_nlink) {
178 printk ("ext4_free_inode: inode has nlink=%d\n", 180 printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n",
179 inode->i_nlink); 181 inode->i_nlink);
180 return; 182 return;
181 } 183 }
182 if (!sb) { 184 if (!sb) {
183 printk("ext4_free_inode: inode on nonexistent device\n"); 185 printk(KERN_ERR "ext4_free_inode: inode on "
186 "nonexistent device\n");
184 return; 187 return;
185 } 188 }
186 sbi = EXT4_SB(sb); 189 sbi = EXT4_SB(sb);
187 190
188 ino = inode->i_ino; 191 ino = inode->i_ino;
189 ext4_debug ("freeing inode %lu\n", ino); 192 ext4_debug("freeing inode %lu\n", ino);
190 193
191 /* 194 /*
192 * Note: we must free any quota before locking the superblock, 195 * Note: we must free any quota before locking the superblock,
@@ -200,12 +203,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
200 is_directory = S_ISDIR(inode->i_mode); 203 is_directory = S_ISDIR(inode->i_mode);
201 204
202 /* Do this BEFORE marking the inode not in use or returning an error */ 205 /* Do this BEFORE marking the inode not in use or returning an error */
203 clear_inode (inode); 206 clear_inode(inode);
204 207
205 es = EXT4_SB(sb)->s_es; 208 es = EXT4_SB(sb)->s_es;
206 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 209 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
207 ext4_error (sb, "ext4_free_inode", 210 ext4_error(sb, "ext4_free_inode",
208 "reserved or nonexistent inode %lu", ino); 211 "reserved or nonexistent inode %lu", ino);
209 goto error_return; 212 goto error_return;
210 } 213 }
211 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 214 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
@@ -222,10 +225,10 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
222 /* Ok, now we can actually update the inode bitmaps.. */ 225 /* Ok, now we can actually update the inode bitmaps.. */
223 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 226 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
224 bit, bitmap_bh->b_data)) 227 bit, bitmap_bh->b_data))
225 ext4_error (sb, "ext4_free_inode", 228 ext4_error(sb, "ext4_free_inode",
226 "bit already cleared for inode %lu", ino); 229 "bit already cleared for inode %lu", ino);
227 else { 230 else {
228 gdp = ext4_get_group_desc (sb, block_group, &bh2); 231 gdp = ext4_get_group_desc(sb, block_group, &bh2);
229 232
230 BUFFER_TRACE(bh2, "get_write_access"); 233 BUFFER_TRACE(bh2, "get_write_access");
231 fatal = ext4_journal_get_write_access(handle, bh2); 234 fatal = ext4_journal_get_write_access(handle, bh2);
@@ -287,7 +290,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
287 avefreei = freei / ngroups; 290 avefreei = freei / ngroups;
288 291
289 for (group = 0; group < ngroups; group++) { 292 for (group = 0; group < ngroups; group++) {
290 desc = ext4_get_group_desc (sb, group, NULL); 293 desc = ext4_get_group_desc(sb, group, NULL);
291 if (!desc || !desc->bg_free_inodes_count) 294 if (!desc || !desc->bg_free_inodes_count)
292 continue; 295 continue;
293 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) 296 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
@@ -576,16 +579,16 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
576 * For other inodes, search forward from the parent directory's block 579 * For other inodes, search forward from the parent directory's block
577 * group to find a free inode. 580 * group to find a free inode.
578 */ 581 */
579struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) 582struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
580{ 583{
581 struct super_block *sb; 584 struct super_block *sb;
582 struct buffer_head *bitmap_bh = NULL; 585 struct buffer_head *bitmap_bh = NULL;
583 struct buffer_head *bh2; 586 struct buffer_head *bh2;
584 ext4_group_t group = 0; 587 ext4_group_t group = 0;
585 unsigned long ino = 0; 588 unsigned long ino = 0;
586 struct inode * inode; 589 struct inode *inode;
587 struct ext4_group_desc * gdp = NULL; 590 struct ext4_group_desc *gdp = NULL;
588 struct ext4_super_block * es; 591 struct ext4_super_block *es;
589 struct ext4_inode_info *ei; 592 struct ext4_inode_info *ei;
590 struct ext4_sb_info *sbi; 593 struct ext4_sb_info *sbi;
591 int ret2, err = 0; 594 int ret2, err = 0;
@@ -613,7 +616,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
613 } 616 }
614 617
615 if (S_ISDIR(mode)) { 618 if (S_ISDIR(mode)) {
616 if (test_opt (sb, OLDALLOC)) 619 if (test_opt(sb, OLDALLOC))
617 ret2 = find_group_dir(sb, dir, &group); 620 ret2 = find_group_dir(sb, dir, &group);
618 else 621 else
619 ret2 = find_group_orlov(sb, dir, &group); 622 ret2 = find_group_orlov(sb, dir, &group);
@@ -783,7 +786,7 @@ got:
783 } 786 }
784 787
785 inode->i_uid = current->fsuid; 788 inode->i_uid = current->fsuid;
786 if (test_opt (sb, GRPID)) 789 if (test_opt(sb, GRPID))
787 inode->i_gid = dir->i_gid; 790 inode->i_gid = dir->i_gid;
788 else if (dir->i_mode & S_ISGID) { 791 else if (dir->i_mode & S_ISGID) {
789 inode->i_gid = dir->i_gid; 792 inode->i_gid = dir->i_gid;
@@ -816,7 +819,6 @@ got:
816 ei->i_flags &= ~EXT4_DIRSYNC_FL; 819 ei->i_flags &= ~EXT4_DIRSYNC_FL;
817 ei->i_file_acl = 0; 820 ei->i_file_acl = 0;
818 ei->i_dtime = 0; 821 ei->i_dtime = 0;
819 ei->i_block_alloc_info = NULL;
820 ei->i_block_group = group; 822 ei->i_block_group = group;
821 823
822 ext4_set_inode_flags(inode); 824 ext4_set_inode_flags(inode);
@@ -832,7 +834,7 @@ got:
832 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; 834 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
833 835
834 ret = inode; 836 ret = inode;
835 if(DQUOT_ALLOC_INODE(inode)) { 837 if (DQUOT_ALLOC_INODE(inode)) {
836 err = -EDQUOT; 838 err = -EDQUOT;
837 goto fail_drop; 839 goto fail_drop;
838 } 840 }
@@ -841,7 +843,7 @@ got:
841 if (err) 843 if (err)
842 goto fail_free_drop; 844 goto fail_free_drop;
843 845
844 err = ext4_init_security(handle,inode, dir); 846 err = ext4_init_security(handle, inode, dir);
845 if (err) 847 if (err)
846 goto fail_free_drop; 848 goto fail_free_drop;
847 849
@@ -959,7 +961,7 @@ error:
959 return ERR_PTR(err); 961 return ERR_PTR(err);
960} 962}
961 963
962unsigned long ext4_count_free_inodes (struct super_block * sb) 964unsigned long ext4_count_free_inodes(struct super_block *sb)
963{ 965{
964 unsigned long desc_count; 966 unsigned long desc_count;
965 struct ext4_group_desc *gdp; 967 struct ext4_group_desc *gdp;
@@ -974,7 +976,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
974 bitmap_count = 0; 976 bitmap_count = 0;
975 gdp = NULL; 977 gdp = NULL;
976 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 978 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
977 gdp = ext4_get_group_desc (sb, i, NULL); 979 gdp = ext4_get_group_desc(sb, i, NULL);
978 if (!gdp) 980 if (!gdp)
979 continue; 981 continue;
980 desc_count += le16_to_cpu(gdp->bg_free_inodes_count); 982 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
@@ -989,13 +991,14 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
989 bitmap_count += x; 991 bitmap_count += x;
990 } 992 }
991 brelse(bitmap_bh); 993 brelse(bitmap_bh);
992 printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n", 994 printk(KERN_DEBUG "ext4_count_free_inodes: "
993 le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); 995 "stored = %u, computed = %lu, %lu\n",
996 le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
994 return desc_count; 997 return desc_count;
995#else 998#else
996 desc_count = 0; 999 desc_count = 0;
997 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1000 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
998 gdp = ext4_get_group_desc (sb, i, NULL); 1001 gdp = ext4_get_group_desc(sb, i, NULL);
999 if (!gdp) 1002 if (!gdp)
1000 continue; 1003 continue;
1001 desc_count += le16_to_cpu(gdp->bg_free_inodes_count); 1004 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
@@ -1006,13 +1009,13 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
1006} 1009}
1007 1010
1008/* Called at mount-time, super-block is locked */ 1011/* Called at mount-time, super-block is locked */
1009unsigned long ext4_count_dirs (struct super_block * sb) 1012unsigned long ext4_count_dirs(struct super_block * sb)
1010{ 1013{
1011 unsigned long count = 0; 1014 unsigned long count = 0;
1012 ext4_group_t i; 1015 ext4_group_t i;
1013 1016
1014 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1017 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
1015 struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); 1018 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1016 if (!gdp) 1019 if (!gdp)
1017 continue; 1020 continue;
1018 count += le16_to_cpu(gdp->bg_used_dirs_count); 1021 count += le16_to_cpu(gdp->bg_used_dirs_count);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7e91913e325b..9b4ec9decfd1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -190,7 +190,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
190/* 190/*
191 * Called at the last iput() if i_nlink is zero. 191 * Called at the last iput() if i_nlink is zero.
192 */ 192 */
193void ext4_delete_inode (struct inode * inode) 193void ext4_delete_inode(struct inode *inode)
194{ 194{
195 handle_t *handle; 195 handle_t *handle;
196 int err; 196 int err;
@@ -330,11 +330,11 @@ static int ext4_block_to_path(struct inode *inode,
330 int final = 0; 330 int final = 0;
331 331
332 if (i_block < 0) { 332 if (i_block < 0) {
333 ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0"); 333 ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0");
334 } else if (i_block < direct_blocks) { 334 } else if (i_block < direct_blocks) {
335 offsets[n++] = i_block; 335 offsets[n++] = i_block;
336 final = direct_blocks; 336 final = direct_blocks;
337 } else if ( (i_block -= direct_blocks) < indirect_blocks) { 337 } else if ((i_block -= direct_blocks) < indirect_blocks) {
338 offsets[n++] = EXT4_IND_BLOCK; 338 offsets[n++] = EXT4_IND_BLOCK;
339 offsets[n++] = i_block; 339 offsets[n++] = i_block;
340 final = ptrs; 340 final = ptrs;
@@ -400,14 +400,14 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
400 400
401 *err = 0; 401 *err = 0;
402 /* i_data is not going away, no lock needed */ 402 /* i_data is not going away, no lock needed */
403 add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets); 403 add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
404 if (!p->key) 404 if (!p->key)
405 goto no_block; 405 goto no_block;
406 while (--depth) { 406 while (--depth) {
407 bh = sb_bread(sb, le32_to_cpu(p->key)); 407 bh = sb_bread(sb, le32_to_cpu(p->key));
408 if (!bh) 408 if (!bh)
409 goto failure; 409 goto failure;
410 add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); 410 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
411 /* Reader: end */ 411 /* Reader: end */
412 if (!p->key) 412 if (!p->key)
413 goto no_block; 413 goto no_block;
@@ -443,7 +443,7 @@ no_block:
443static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) 443static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
444{ 444{
445 struct ext4_inode_info *ei = EXT4_I(inode); 445 struct ext4_inode_info *ei = EXT4_I(inode);
446 __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; 446 __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
447 __le32 *p; 447 __le32 *p;
448 ext4_fsblk_t bg_start; 448 ext4_fsblk_t bg_start;
449 ext4_fsblk_t last_block; 449 ext4_fsblk_t last_block;
@@ -486,18 +486,9 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
486static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, 486static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
487 Indirect *partial) 487 Indirect *partial)
488{ 488{
489 struct ext4_block_alloc_info *block_i;
490
491 block_i = EXT4_I(inode)->i_block_alloc_info;
492
493 /* 489 /*
494 * try the heuristic for sequential allocation, 490 * XXX need to get goal block from mballoc's data structures
495 * failing that at least try to get decent locality.
496 */ 491 */
497 if (block_i && (block == block_i->last_alloc_logical_block + 1)
498 && (block_i->last_alloc_physical_block != 0)) {
499 return block_i->last_alloc_physical_block + 1;
500 }
501 492
502 return ext4_find_near(inode, partial); 493 return ext4_find_near(inode, partial);
503} 494}
@@ -630,7 +621,7 @@ allocated:
630 *err = 0; 621 *err = 0;
631 return ret; 622 return ret;
632failed_out: 623failed_out:
633 for (i = 0; i <index; i++) 624 for (i = 0; i < index; i++)
634 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 625 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
635 return ret; 626 return ret;
636} 627}
@@ -703,7 +694,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
703 branch[n].p = (__le32 *) bh->b_data + offsets[n]; 694 branch[n].p = (__le32 *) bh->b_data + offsets[n];
704 branch[n].key = cpu_to_le32(new_blocks[n]); 695 branch[n].key = cpu_to_le32(new_blocks[n]);
705 *branch[n].p = branch[n].key; 696 *branch[n].p = branch[n].key;
706 if ( n == indirect_blks) { 697 if (n == indirect_blks) {
707 current_block = new_blocks[n]; 698 current_block = new_blocks[n];
708 /* 699 /*
709 * End of chain, update the last new metablock of 700 * End of chain, update the last new metablock of
@@ -730,7 +721,7 @@ failed:
730 BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); 721 BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
731 ext4_journal_forget(handle, branch[i].bh); 722 ext4_journal_forget(handle, branch[i].bh);
732 } 723 }
733 for (i = 0; i <indirect_blks; i++) 724 for (i = 0; i < indirect_blks; i++)
734 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 725 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
735 726
736 ext4_free_blocks(handle, inode, new_blocks[i], num, 0); 727 ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
@@ -757,10 +748,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
757{ 748{
758 int i; 749 int i;
759 int err = 0; 750 int err = 0;
760 struct ext4_block_alloc_info *block_i;
761 ext4_fsblk_t current_block; 751 ext4_fsblk_t current_block;
762 752
763 block_i = EXT4_I(inode)->i_block_alloc_info;
764 /* 753 /*
765 * If we're splicing into a [td]indirect block (as opposed to the 754 * If we're splicing into a [td]indirect block (as opposed to the
766 * inode) then we need to get write access to the [td]indirect block 755 * inode) then we need to get write access to the [td]indirect block
@@ -783,18 +772,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
783 if (num == 0 && blks > 1) { 772 if (num == 0 && blks > 1) {
784 current_block = le32_to_cpu(where->key) + 1; 773 current_block = le32_to_cpu(where->key) + 1;
785 for (i = 1; i < blks; i++) 774 for (i = 1; i < blks; i++)
786 *(where->p + i ) = cpu_to_le32(current_block++); 775 *(where->p + i) = cpu_to_le32(current_block++);
787 }
788
789 /*
790 * update the most recently allocated logical & physical block
791 * in i_block_alloc_info, to assist find the proper goal block for next
792 * allocation
793 */
794 if (block_i) {
795 block_i->last_alloc_logical_block = block + blks - 1;
796 block_i->last_alloc_physical_block =
797 le32_to_cpu(where[num].key) + blks - 1;
798 } 776 }
799 777
800 /* We are done with atomic stuff, now do the rest of housekeeping */ 778 /* We are done with atomic stuff, now do the rest of housekeeping */
@@ -914,12 +892,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
914 goto cleanup; 892 goto cleanup;
915 893
916 /* 894 /*
917 * Okay, we need to do block allocation. Lazily initialize the block 895 * Okay, we need to do block allocation.
918 * allocation info here if necessary
919 */ 896 */
920 if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
921 ext4_init_block_alloc_info(inode);
922
923 goal = ext4_find_goal(inode, iblock, partial); 897 goal = ext4_find_goal(inode, iblock, partial);
924 898
925 /* the number of blocks need to allocate for [d,t]indirect blocks */ 899 /* the number of blocks need to allocate for [d,t]indirect blocks */
@@ -1030,19 +1004,20 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1030 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1004 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1031 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1005 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1032 1006
1033 /* Account for allocated meta_blocks */ 1007 if (mdb_free) {
1034 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; 1008 /* Account for allocated meta_blocks */
1009 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1035 1010
1036 /* update fs free blocks counter for truncate case */ 1011 /* update fs dirty blocks counter */
1037 percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free); 1012 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1013 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1014 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1015 }
1038 1016
1039 /* update per-inode reservations */ 1017 /* update per-inode reservations */
1040 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); 1018 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1041 EXT4_I(inode)->i_reserved_data_blocks -= used; 1019 EXT4_I(inode)->i_reserved_data_blocks -= used;
1042 1020
1043 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1044 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1045 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1046 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1021 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1047} 1022}
1048 1023
@@ -1160,8 +1135,8 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1160/* Maximum number of blocks we map for direct IO at once. */ 1135/* Maximum number of blocks we map for direct IO at once. */
1161#define DIO_MAX_BLOCKS 4096 1136#define DIO_MAX_BLOCKS 4096
1162 1137
1163static int ext4_get_block(struct inode *inode, sector_t iblock, 1138int ext4_get_block(struct inode *inode, sector_t iblock,
1164 struct buffer_head *bh_result, int create) 1139 struct buffer_head *bh_result, int create)
1165{ 1140{
1166 handle_t *handle = ext4_journal_current_handle(); 1141 handle_t *handle = ext4_journal_current_handle();
1167 int ret = 0, started = 0; 1142 int ret = 0, started = 0;
@@ -1241,7 +1216,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
1241 BUFFER_TRACE(bh, "call get_create_access"); 1216 BUFFER_TRACE(bh, "call get_create_access");
1242 fatal = ext4_journal_get_create_access(handle, bh); 1217 fatal = ext4_journal_get_create_access(handle, bh);
1243 if (!fatal && !buffer_uptodate(bh)) { 1218 if (!fatal && !buffer_uptodate(bh)) {
1244 memset(bh->b_data,0,inode->i_sb->s_blocksize); 1219 memset(bh->b_data, 0, inode->i_sb->s_blocksize);
1245 set_buffer_uptodate(bh); 1220 set_buffer_uptodate(bh);
1246 } 1221 }
1247 unlock_buffer(bh); 1222 unlock_buffer(bh);
@@ -1266,7 +1241,7 @@ err:
1266struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, 1241struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
1267 ext4_lblk_t block, int create, int *err) 1242 ext4_lblk_t block, int create, int *err)
1268{ 1243{
1269 struct buffer_head * bh; 1244 struct buffer_head *bh;
1270 1245
1271 bh = ext4_getblk(handle, inode, block, create, err); 1246 bh = ext4_getblk(handle, inode, block, create, err);
1272 if (!bh) 1247 if (!bh)
@@ -1282,13 +1257,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
1282 return NULL; 1257 return NULL;
1283} 1258}
1284 1259
1285static int walk_page_buffers( handle_t *handle, 1260static int walk_page_buffers(handle_t *handle,
1286 struct buffer_head *head, 1261 struct buffer_head *head,
1287 unsigned from, 1262 unsigned from,
1288 unsigned to, 1263 unsigned to,
1289 int *partial, 1264 int *partial,
1290 int (*fn)( handle_t *handle, 1265 int (*fn)(handle_t *handle,
1291 struct buffer_head *bh)) 1266 struct buffer_head *bh))
1292{ 1267{
1293 struct buffer_head *bh; 1268 struct buffer_head *bh;
1294 unsigned block_start, block_end; 1269 unsigned block_start, block_end;
@@ -1296,9 +1271,9 @@ static int walk_page_buffers( handle_t *handle,
1296 int err, ret = 0; 1271 int err, ret = 0;
1297 struct buffer_head *next; 1272 struct buffer_head *next;
1298 1273
1299 for ( bh = head, block_start = 0; 1274 for (bh = head, block_start = 0;
1300 ret == 0 && (bh != head || !block_start); 1275 ret == 0 && (bh != head || !block_start);
1301 block_start = block_end, bh = next) 1276 block_start = block_end, bh = next)
1302 { 1277 {
1303 next = bh->b_this_page; 1278 next = bh->b_this_page;
1304 block_end = block_start + blocksize; 1279 block_end = block_start + blocksize;
@@ -1351,23 +1326,23 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
1351 loff_t pos, unsigned len, unsigned flags, 1326 loff_t pos, unsigned len, unsigned flags,
1352 struct page **pagep, void **fsdata) 1327 struct page **pagep, void **fsdata)
1353{ 1328{
1354 struct inode *inode = mapping->host; 1329 struct inode *inode = mapping->host;
1355 int ret, needed_blocks = ext4_writepage_trans_blocks(inode); 1330 int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
1356 handle_t *handle; 1331 handle_t *handle;
1357 int retries = 0; 1332 int retries = 0;
1358 struct page *page; 1333 struct page *page;
1359 pgoff_t index; 1334 pgoff_t index;
1360 unsigned from, to; 1335 unsigned from, to;
1361 1336
1362 index = pos >> PAGE_CACHE_SHIFT; 1337 index = pos >> PAGE_CACHE_SHIFT;
1363 from = pos & (PAGE_CACHE_SIZE - 1); 1338 from = pos & (PAGE_CACHE_SIZE - 1);
1364 to = from + len; 1339 to = from + len;
1365 1340
1366retry: 1341retry:
1367 handle = ext4_journal_start(inode, needed_blocks); 1342 handle = ext4_journal_start(inode, needed_blocks);
1368 if (IS_ERR(handle)) { 1343 if (IS_ERR(handle)) {
1369 ret = PTR_ERR(handle); 1344 ret = PTR_ERR(handle);
1370 goto out; 1345 goto out;
1371 } 1346 }
1372 1347
1373 page = __grab_cache_page(mapping, index); 1348 page = __grab_cache_page(mapping, index);
@@ -1387,9 +1362,16 @@ retry:
1387 } 1362 }
1388 1363
1389 if (ret) { 1364 if (ret) {
1390 unlock_page(page); 1365 unlock_page(page);
1391 ext4_journal_stop(handle); 1366 ext4_journal_stop(handle);
1392 page_cache_release(page); 1367 page_cache_release(page);
1368 /*
1369 * block_write_begin may have instantiated a few blocks
1370 * outside i_size. Trim these off again. Don't need
1371 * i_size_read because we hold i_mutex.
1372 */
1373 if (pos + len > inode->i_size)
1374 vmtruncate(inode, inode->i_size);
1393 } 1375 }
1394 1376
1395 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 1377 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -1426,16 +1408,18 @@ static int ext4_ordered_write_end(struct file *file,
1426 ret = ext4_jbd2_file_inode(handle, inode); 1408 ret = ext4_jbd2_file_inode(handle, inode);
1427 1409
1428 if (ret == 0) { 1410 if (ret == 0) {
1429 /*
1430 * generic_write_end() will run mark_inode_dirty() if i_size
1431 * changes. So let's piggyback the i_disksize mark_inode_dirty
1432 * into that.
1433 */
1434 loff_t new_i_size; 1411 loff_t new_i_size;
1435 1412
1436 new_i_size = pos + copied; 1413 new_i_size = pos + copied;
1437 if (new_i_size > EXT4_I(inode)->i_disksize) 1414 if (new_i_size > EXT4_I(inode)->i_disksize) {
1438 EXT4_I(inode)->i_disksize = new_i_size; 1415 ext4_update_i_disksize(inode, new_i_size);
1416 /* We need to mark inode dirty even if
1417 * new_i_size is less that inode->i_size
1418 * bu greater than i_disksize.(hint delalloc)
1419 */
1420 ext4_mark_inode_dirty(handle, inode);
1421 }
1422
1439 ret2 = generic_write_end(file, mapping, pos, len, copied, 1423 ret2 = generic_write_end(file, mapping, pos, len, copied,
1440 page, fsdata); 1424 page, fsdata);
1441 copied = ret2; 1425 copied = ret2;
@@ -1460,8 +1444,14 @@ static int ext4_writeback_write_end(struct file *file,
1460 loff_t new_i_size; 1444 loff_t new_i_size;
1461 1445
1462 new_i_size = pos + copied; 1446 new_i_size = pos + copied;
1463 if (new_i_size > EXT4_I(inode)->i_disksize) 1447 if (new_i_size > EXT4_I(inode)->i_disksize) {
1464 EXT4_I(inode)->i_disksize = new_i_size; 1448 ext4_update_i_disksize(inode, new_i_size);
1449 /* We need to mark inode dirty even if
1450 * new_i_size is less that inode->i_size
1451 * bu greater than i_disksize.(hint delalloc)
1452 */
1453 ext4_mark_inode_dirty(handle, inode);
1454 }
1465 1455
1466 ret2 = generic_write_end(file, mapping, pos, len, copied, 1456 ret2 = generic_write_end(file, mapping, pos, len, copied,
1467 page, fsdata); 1457 page, fsdata);
@@ -1486,6 +1476,7 @@ static int ext4_journalled_write_end(struct file *file,
1486 int ret = 0, ret2; 1476 int ret = 0, ret2;
1487 int partial = 0; 1477 int partial = 0;
1488 unsigned from, to; 1478 unsigned from, to;
1479 loff_t new_i_size;
1489 1480
1490 from = pos & (PAGE_CACHE_SIZE - 1); 1481 from = pos & (PAGE_CACHE_SIZE - 1);
1491 to = from + len; 1482 to = from + len;
@@ -1500,11 +1491,12 @@ static int ext4_journalled_write_end(struct file *file,
1500 to, &partial, write_end_fn); 1491 to, &partial, write_end_fn);
1501 if (!partial) 1492 if (!partial)
1502 SetPageUptodate(page); 1493 SetPageUptodate(page);
1503 if (pos+copied > inode->i_size) 1494 new_i_size = pos + copied;
1495 if (new_i_size > inode->i_size)
1504 i_size_write(inode, pos+copied); 1496 i_size_write(inode, pos+copied);
1505 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; 1497 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
1506 if (inode->i_size > EXT4_I(inode)->i_disksize) { 1498 if (new_i_size > EXT4_I(inode)->i_disksize) {
1507 EXT4_I(inode)->i_disksize = inode->i_size; 1499 ext4_update_i_disksize(inode, new_i_size);
1508 ret2 = ext4_mark_inode_dirty(handle, inode); 1500 ret2 = ext4_mark_inode_dirty(handle, inode);
1509 if (!ret) 1501 if (!ret)
1510 ret = ret2; 1502 ret = ret2;
@@ -1521,6 +1513,7 @@ static int ext4_journalled_write_end(struct file *file,
1521 1513
1522static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1514static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1523{ 1515{
1516 int retries = 0;
1524 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1517 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1525 unsigned long md_needed, mdblocks, total = 0; 1518 unsigned long md_needed, mdblocks, total = 0;
1526 1519
@@ -1529,6 +1522,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1529 * in order to allocate nrblocks 1522 * in order to allocate nrblocks
1530 * worse case is one extent per block 1523 * worse case is one extent per block
1531 */ 1524 */
1525repeat:
1532 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1526 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1533 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1527 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
1534 mdblocks = ext4_calc_metadata_amount(inode, total); 1528 mdblocks = ext4_calc_metadata_amount(inode, total);
@@ -1537,13 +1531,14 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1537 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; 1531 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1538 total = md_needed + nrblocks; 1532 total = md_needed + nrblocks;
1539 1533
1540 if (ext4_has_free_blocks(sbi, total) < total) { 1534 if (ext4_claim_free_blocks(sbi, total)) {
1541 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1535 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1536 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1537 yield();
1538 goto repeat;
1539 }
1542 return -ENOSPC; 1540 return -ENOSPC;
1543 } 1541 }
1544 /* reduce fs free blocks counter */
1545 percpu_counter_sub(&sbi->s_freeblocks_counter, total);
1546
1547 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1542 EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
1548 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; 1543 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
1549 1544
@@ -1585,8 +1580,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1585 1580
1586 release = to_free + mdb_free; 1581 release = to_free + mdb_free;
1587 1582
1588 /* update fs free blocks counter for truncate case */ 1583 /* update fs dirty blocks counter for truncate case */
1589 percpu_counter_add(&sbi->s_freeblocks_counter, release); 1584 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1590 1585
1591 /* update per-inode reservations */ 1586 /* update per-inode reservations */
1592 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1587 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
@@ -1630,6 +1625,7 @@ struct mpage_da_data {
1630 struct writeback_control *wbc; 1625 struct writeback_control *wbc;
1631 int io_done; 1626 int io_done;
1632 long pages_written; 1627 long pages_written;
1628 int retval;
1633}; 1629};
1634 1630
1635/* 1631/*
@@ -1783,6 +1779,57 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
1783 unmap_underlying_metadata(bdev, bh->b_blocknr + i); 1779 unmap_underlying_metadata(bdev, bh->b_blocknr + i);
1784} 1780}
1785 1781
1782static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
1783 sector_t logical, long blk_cnt)
1784{
1785 int nr_pages, i;
1786 pgoff_t index, end;
1787 struct pagevec pvec;
1788 struct inode *inode = mpd->inode;
1789 struct address_space *mapping = inode->i_mapping;
1790
1791 index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
1792 end = (logical + blk_cnt - 1) >>
1793 (PAGE_CACHE_SHIFT - inode->i_blkbits);
1794 while (index <= end) {
1795 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
1796 if (nr_pages == 0)
1797 break;
1798 for (i = 0; i < nr_pages; i++) {
1799 struct page *page = pvec.pages[i];
1800 index = page->index;
1801 if (index > end)
1802 break;
1803 index++;
1804
1805 BUG_ON(!PageLocked(page));
1806 BUG_ON(PageWriteback(page));
1807 block_invalidatepage(page, 0);
1808 ClearPageUptodate(page);
1809 unlock_page(page);
1810 }
1811 }
1812 return;
1813}
1814
1815static void ext4_print_free_blocks(struct inode *inode)
1816{
1817 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1818 printk(KERN_EMERG "Total free blocks count %lld\n",
1819 ext4_count_free_blocks(inode->i_sb));
1820 printk(KERN_EMERG "Free/Dirty block details\n");
1821 printk(KERN_EMERG "free_blocks=%lld\n",
1822 percpu_counter_sum(&sbi->s_freeblocks_counter));
1823 printk(KERN_EMERG "dirty_blocks=%lld\n",
1824 percpu_counter_sum(&sbi->s_dirtyblocks_counter));
1825 printk(KERN_EMERG "Block reservation details\n");
1826 printk(KERN_EMERG "i_reserved_data_blocks=%lu\n",
1827 EXT4_I(inode)->i_reserved_data_blocks);
1828 printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n",
1829 EXT4_I(inode)->i_reserved_meta_blocks);
1830 return;
1831}
1832
1786/* 1833/*
1787 * mpage_da_map_blocks - go through given space 1834 * mpage_da_map_blocks - go through given space
1788 * 1835 *
@@ -1792,32 +1839,69 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
1792 * The function skips space we know is already mapped to disk blocks. 1839 * The function skips space we know is already mapped to disk blocks.
1793 * 1840 *
1794 */ 1841 */
1795static void mpage_da_map_blocks(struct mpage_da_data *mpd) 1842static int mpage_da_map_blocks(struct mpage_da_data *mpd)
1796{ 1843{
1797 int err = 0; 1844 int err = 0;
1798 struct buffer_head *lbh = &mpd->lbh;
1799 sector_t next = lbh->b_blocknr;
1800 struct buffer_head new; 1845 struct buffer_head new;
1846 struct buffer_head *lbh = &mpd->lbh;
1847 sector_t next;
1801 1848
1802 /* 1849 /*
1803 * We consider only non-mapped and non-allocated blocks 1850 * We consider only non-mapped and non-allocated blocks
1804 */ 1851 */
1805 if (buffer_mapped(lbh) && !buffer_delay(lbh)) 1852 if (buffer_mapped(lbh) && !buffer_delay(lbh))
1806 return; 1853 return 0;
1807
1808 new.b_state = lbh->b_state; 1854 new.b_state = lbh->b_state;
1809 new.b_blocknr = 0; 1855 new.b_blocknr = 0;
1810 new.b_size = lbh->b_size; 1856 new.b_size = lbh->b_size;
1811 1857 next = lbh->b_blocknr;
1812 /* 1858 /*
1813 * If we didn't accumulate anything 1859 * If we didn't accumulate anything
1814 * to write simply return 1860 * to write simply return
1815 */ 1861 */
1816 if (!new.b_size) 1862 if (!new.b_size)
1817 return; 1863 return 0;
1818 err = mpd->get_block(mpd->inode, next, &new, 1); 1864 err = mpd->get_block(mpd->inode, next, &new, 1);
1819 if (err) 1865 if (err) {
1820 return; 1866
1867 /* If get block returns with error
1868 * we simply return. Later writepage
1869 * will redirty the page and writepages
1870 * will find the dirty page again
1871 */
1872 if (err == -EAGAIN)
1873 return 0;
1874
1875 if (err == -ENOSPC &&
1876 ext4_count_free_blocks(mpd->inode->i_sb)) {
1877 mpd->retval = err;
1878 return 0;
1879 }
1880
1881 /*
1882 * get block failure will cause us
1883 * to loop in writepages. Because
1884 * a_ops->writepage won't be able to
1885 * make progress. The page will be redirtied
1886 * by writepage and writepages will again
1887 * try to write the same.
1888 */
1889 printk(KERN_EMERG "%s block allocation failed for inode %lu "
1890 "at logical offset %llu with max blocks "
1891 "%zd with error %d\n",
1892 __func__, mpd->inode->i_ino,
1893 (unsigned long long)next,
1894 lbh->b_size >> mpd->inode->i_blkbits, err);
1895 printk(KERN_EMERG "This should not happen.!! "
1896 "Data will be lost\n");
1897 if (err == -ENOSPC) {
1898 ext4_print_free_blocks(mpd->inode);
1899 }
1900 /* invlaidate all the pages */
1901 ext4_da_block_invalidatepages(mpd, next,
1902 lbh->b_size >> mpd->inode->i_blkbits);
1903 return err;
1904 }
1821 BUG_ON(new.b_size == 0); 1905 BUG_ON(new.b_size == 0);
1822 1906
1823 if (buffer_new(&new)) 1907 if (buffer_new(&new))
@@ -1830,7 +1914,7 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1830 if (buffer_delay(lbh) || buffer_unwritten(lbh)) 1914 if (buffer_delay(lbh) || buffer_unwritten(lbh))
1831 mpage_put_bnr_to_bhs(mpd, next, &new); 1915 mpage_put_bnr_to_bhs(mpd, next, &new);
1832 1916
1833 return; 1917 return 0;
1834} 1918}
1835 1919
1836#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ 1920#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -1899,8 +1983,8 @@ flush_it:
1899 * We couldn't merge the block to our extent, so we 1983 * We couldn't merge the block to our extent, so we
1900 * need to flush current extent and start new one 1984 * need to flush current extent and start new one
1901 */ 1985 */
1902 mpage_da_map_blocks(mpd); 1986 if (mpage_da_map_blocks(mpd) == 0)
1903 mpage_da_submit_io(mpd); 1987 mpage_da_submit_io(mpd);
1904 mpd->io_done = 1; 1988 mpd->io_done = 1;
1905 return; 1989 return;
1906} 1990}
@@ -1942,8 +2026,8 @@ static int __mpage_da_writepage(struct page *page,
1942 * and start IO on them using writepage() 2026 * and start IO on them using writepage()
1943 */ 2027 */
1944 if (mpd->next_page != mpd->first_page) { 2028 if (mpd->next_page != mpd->first_page) {
1945 mpage_da_map_blocks(mpd); 2029 if (mpage_da_map_blocks(mpd) == 0)
1946 mpage_da_submit_io(mpd); 2030 mpage_da_submit_io(mpd);
1947 /* 2031 /*
1948 * skip rest of the page in the page_vec 2032 * skip rest of the page in the page_vec
1949 */ 2033 */
@@ -2018,39 +2102,36 @@ static int __mpage_da_writepage(struct page *page,
2018 */ 2102 */
2019static int mpage_da_writepages(struct address_space *mapping, 2103static int mpage_da_writepages(struct address_space *mapping,
2020 struct writeback_control *wbc, 2104 struct writeback_control *wbc,
2021 get_block_t get_block) 2105 struct mpage_da_data *mpd)
2022{ 2106{
2023 struct mpage_da_data mpd;
2024 long to_write; 2107 long to_write;
2025 int ret; 2108 int ret;
2026 2109
2027 if (!get_block) 2110 if (!mpd->get_block)
2028 return generic_writepages(mapping, wbc); 2111 return generic_writepages(mapping, wbc);
2029 2112
2030 mpd.wbc = wbc; 2113 mpd->lbh.b_size = 0;
2031 mpd.inode = mapping->host; 2114 mpd->lbh.b_state = 0;
2032 mpd.lbh.b_size = 0; 2115 mpd->lbh.b_blocknr = 0;
2033 mpd.lbh.b_state = 0; 2116 mpd->first_page = 0;
2034 mpd.lbh.b_blocknr = 0; 2117 mpd->next_page = 0;
2035 mpd.first_page = 0; 2118 mpd->io_done = 0;
2036 mpd.next_page = 0; 2119 mpd->pages_written = 0;
2037 mpd.get_block = get_block; 2120 mpd->retval = 0;
2038 mpd.io_done = 0;
2039 mpd.pages_written = 0;
2040 2121
2041 to_write = wbc->nr_to_write; 2122 to_write = wbc->nr_to_write;
2042 2123
2043 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); 2124 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
2044 2125
2045 /* 2126 /*
2046 * Handle last extent of pages 2127 * Handle last extent of pages
2047 */ 2128 */
2048 if (!mpd.io_done && mpd.next_page != mpd.first_page) { 2129 if (!mpd->io_done && mpd->next_page != mpd->first_page) {
2049 mpage_da_map_blocks(&mpd); 2130 if (mpage_da_map_blocks(mpd) == 0)
2050 mpage_da_submit_io(&mpd); 2131 mpage_da_submit_io(mpd);
2051 } 2132 }
2052 2133
2053 wbc->nr_to_write = to_write - mpd.pages_written; 2134 wbc->nr_to_write = to_write - mpd->pages_written;
2054 return ret; 2135 return ret;
2055} 2136}
2056 2137
@@ -2103,18 +2184,24 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
2103 handle_t *handle = NULL; 2184 handle_t *handle = NULL;
2104 2185
2105 handle = ext4_journal_current_handle(); 2186 handle = ext4_journal_current_handle();
2106 if (!handle) { 2187 BUG_ON(!handle);
2107 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, 2188 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2108 bh_result, 0, 0, 0); 2189 bh_result, create, 0, EXT4_DELALLOC_RSVED);
2109 BUG_ON(!ret);
2110 } else {
2111 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2112 bh_result, create, 0, EXT4_DELALLOC_RSVED);
2113 }
2114
2115 if (ret > 0) { 2190 if (ret > 0) {
2191
2116 bh_result->b_size = (ret << inode->i_blkbits); 2192 bh_result->b_size = (ret << inode->i_blkbits);
2117 2193
2194 if (ext4_should_order_data(inode)) {
2195 int retval;
2196 retval = ext4_jbd2_file_inode(handle, inode);
2197 if (retval)
2198 /*
2199 * Failed to add inode for ordered
2200 * mode. Don't update file size
2201 */
2202 return retval;
2203 }
2204
2118 /* 2205 /*
2119 * Update on-disk size along with block allocation 2206 * Update on-disk size along with block allocation
2120 * we don't use 'extend_disksize' as size may change 2207 * we don't use 'extend_disksize' as size may change
@@ -2124,18 +2211,9 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
2124 if (disksize > i_size_read(inode)) 2211 if (disksize > i_size_read(inode))
2125 disksize = i_size_read(inode); 2212 disksize = i_size_read(inode);
2126 if (disksize > EXT4_I(inode)->i_disksize) { 2213 if (disksize > EXT4_I(inode)->i_disksize) {
2127 /* 2214 ext4_update_i_disksize(inode, disksize);
2128 * XXX: replace with spinlock if seen contended -bzzz 2215 ret = ext4_mark_inode_dirty(handle, inode);
2129 */ 2216 return ret;
2130 down_write(&EXT4_I(inode)->i_data_sem);
2131 if (disksize > EXT4_I(inode)->i_disksize)
2132 EXT4_I(inode)->i_disksize = disksize;
2133 up_write(&EXT4_I(inode)->i_data_sem);
2134
2135 if (EXT4_I(inode)->i_disksize == disksize) {
2136 ret = ext4_mark_inode_dirty(handle, inode);
2137 return ret;
2138 }
2139 } 2217 }
2140 ret = 0; 2218 ret = 0;
2141 } 2219 }
@@ -2284,6 +2362,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2284{ 2362{
2285 handle_t *handle = NULL; 2363 handle_t *handle = NULL;
2286 loff_t range_start = 0; 2364 loff_t range_start = 0;
2365 struct mpage_da_data mpd;
2287 struct inode *inode = mapping->host; 2366 struct inode *inode = mapping->host;
2288 int needed_blocks, ret = 0, nr_to_writebump = 0; 2367 int needed_blocks, ret = 0, nr_to_writebump = 0;
2289 long to_write, pages_skipped = 0; 2368 long to_write, pages_skipped = 0;
@@ -2317,6 +2396,9 @@ static int ext4_da_writepages(struct address_space *mapping,
2317 range_start = wbc->range_start; 2396 range_start = wbc->range_start;
2318 pages_skipped = wbc->pages_skipped; 2397 pages_skipped = wbc->pages_skipped;
2319 2398
2399 mpd.wbc = wbc;
2400 mpd.inode = mapping->host;
2401
2320restart_loop: 2402restart_loop:
2321 to_write = wbc->nr_to_write; 2403 to_write = wbc->nr_to_write;
2322 while (!ret && to_write > 0) { 2404 while (!ret && to_write > 0) {
@@ -2340,23 +2422,17 @@ restart_loop:
2340 dump_stack(); 2422 dump_stack();
2341 goto out_writepages; 2423 goto out_writepages;
2342 } 2424 }
2343 if (ext4_should_order_data(inode)) {
2344 /*
2345 * With ordered mode we need to add
2346 * the inode to the journal handl
2347 * when we do block allocation.
2348 */
2349 ret = ext4_jbd2_file_inode(handle, inode);
2350 if (ret) {
2351 ext4_journal_stop(handle);
2352 goto out_writepages;
2353 }
2354 }
2355
2356 to_write -= wbc->nr_to_write; 2425 to_write -= wbc->nr_to_write;
2357 ret = mpage_da_writepages(mapping, wbc, 2426
2358 ext4_da_get_block_write); 2427 mpd.get_block = ext4_da_get_block_write;
2428 ret = mpage_da_writepages(mapping, wbc, &mpd);
2429
2359 ext4_journal_stop(handle); 2430 ext4_journal_stop(handle);
2431
2432 if (mpd.retval == -ENOSPC)
2433 jbd2_journal_force_commit_nested(sbi->s_journal);
2434
2435 /* reset the retry count */
2360 if (ret == MPAGE_DA_EXTENT_TAIL) { 2436 if (ret == MPAGE_DA_EXTENT_TAIL) {
2361 /* 2437 /*
2362 * got one extent now try with 2438 * got one extent now try with
@@ -2391,6 +2467,33 @@ out_writepages:
2391 return ret; 2467 return ret;
2392} 2468}
2393 2469
2470#define FALL_BACK_TO_NONDELALLOC 1
2471static int ext4_nonda_switch(struct super_block *sb)
2472{
2473 s64 free_blocks, dirty_blocks;
2474 struct ext4_sb_info *sbi = EXT4_SB(sb);
2475
2476 /*
2477 * switch to non delalloc mode if we are running low
2478 * on free block. The free block accounting via percpu
2479 * counters can get slightly wrong with FBC_BATCH getting
2480 * accumulated on each CPU without updating global counters
2481 * Delalloc need an accurate free block accounting. So switch
2482 * to non delalloc when we are near to error range.
2483 */
2484 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
2485 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
2486 if (2 * free_blocks < 3 * dirty_blocks ||
2487 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2488 /*
2489 * free block count is less that 150% of dirty blocks
2490 * or free blocks is less that watermark
2491 */
2492 return 1;
2493 }
2494 return 0;
2495}
2496
2394static int ext4_da_write_begin(struct file *file, struct address_space *mapping, 2497static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2395 loff_t pos, unsigned len, unsigned flags, 2498 loff_t pos, unsigned len, unsigned flags,
2396 struct page **pagep, void **fsdata) 2499 struct page **pagep, void **fsdata)
@@ -2406,6 +2509,12 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2406 from = pos & (PAGE_CACHE_SIZE - 1); 2509 from = pos & (PAGE_CACHE_SIZE - 1);
2407 to = from + len; 2510 to = from + len;
2408 2511
2512 if (ext4_nonda_switch(inode->i_sb)) {
2513 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
2514 return ext4_write_begin(file, mapping, pos,
2515 len, flags, pagep, fsdata);
2516 }
2517 *fsdata = (void *)0;
2409retry: 2518retry:
2410 /* 2519 /*
2411 * With delayed allocation, we don't log the i_disksize update 2520 * With delayed allocation, we don't log the i_disksize update
@@ -2433,6 +2542,13 @@ retry:
2433 unlock_page(page); 2542 unlock_page(page);
2434 ext4_journal_stop(handle); 2543 ext4_journal_stop(handle);
2435 page_cache_release(page); 2544 page_cache_release(page);
2545 /*
2546 * block_write_begin may have instantiated a few blocks
2547 * outside i_size. Trim these off again. Don't need
2548 * i_size_read because we hold i_mutex.
2549 */
2550 if (pos + len > inode->i_size)
2551 vmtruncate(inode, inode->i_size);
2436 } 2552 }
2437 2553
2438 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2554 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2456,7 +2572,7 @@ static int ext4_da_should_update_i_disksize(struct page *page,
2456 bh = page_buffers(page); 2572 bh = page_buffers(page);
2457 idx = offset >> inode->i_blkbits; 2573 idx = offset >> inode->i_blkbits;
2458 2574
2459 for (i=0; i < idx; i++) 2575 for (i = 0; i < idx; i++)
2460 bh = bh->b_this_page; 2576 bh = bh->b_this_page;
2461 2577
2462 if (!buffer_mapped(bh) || (buffer_delay(bh))) 2578 if (!buffer_mapped(bh) || (buffer_delay(bh)))
@@ -2474,9 +2590,22 @@ static int ext4_da_write_end(struct file *file,
2474 handle_t *handle = ext4_journal_current_handle(); 2590 handle_t *handle = ext4_journal_current_handle();
2475 loff_t new_i_size; 2591 loff_t new_i_size;
2476 unsigned long start, end; 2592 unsigned long start, end;
2593 int write_mode = (int)(unsigned long)fsdata;
2594
2595 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2596 if (ext4_should_order_data(inode)) {
2597 return ext4_ordered_write_end(file, mapping, pos,
2598 len, copied, page, fsdata);
2599 } else if (ext4_should_writeback_data(inode)) {
2600 return ext4_writeback_write_end(file, mapping, pos,
2601 len, copied, page, fsdata);
2602 } else {
2603 BUG();
2604 }
2605 }
2477 2606
2478 start = pos & (PAGE_CACHE_SIZE - 1); 2607 start = pos & (PAGE_CACHE_SIZE - 1);
2479 end = start + copied -1; 2608 end = start + copied - 1;
2480 2609
2481 /* 2610 /*
2482 * generic_write_end() will run mark_inode_dirty() if i_size 2611 * generic_write_end() will run mark_inode_dirty() if i_size
@@ -2500,6 +2629,11 @@ static int ext4_da_write_end(struct file *file,
2500 EXT4_I(inode)->i_disksize = new_i_size; 2629 EXT4_I(inode)->i_disksize = new_i_size;
2501 } 2630 }
2502 up_write(&EXT4_I(inode)->i_data_sem); 2631 up_write(&EXT4_I(inode)->i_data_sem);
2632 /* We need to mark inode dirty even if
2633 * new_i_size is less that inode->i_size
2634 * bu greater than i_disksize.(hint delalloc)
2635 */
2636 ext4_mark_inode_dirty(handle, inode);
2503 } 2637 }
2504 } 2638 }
2505 ret2 = generic_write_end(file, mapping, pos, len, copied, 2639 ret2 = generic_write_end(file, mapping, pos, len, copied,
@@ -2591,7 +2725,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
2591 return 0; 2725 return 0;
2592 } 2726 }
2593 2727
2594 return generic_block_bmap(mapping,block,ext4_get_block); 2728 return generic_block_bmap(mapping, block, ext4_get_block);
2595} 2729}
2596 2730
2597static int bget_one(handle_t *handle, struct buffer_head *bh) 2731static int bget_one(handle_t *handle, struct buffer_head *bh)
@@ -3197,7 +3331,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
3197 if (!partial->key && *partial->p) 3331 if (!partial->key && *partial->p)
3198 /* Writer: end */ 3332 /* Writer: end */
3199 goto no_top; 3333 goto no_top;
3200 for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) 3334 for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
3201 ; 3335 ;
3202 /* 3336 /*
3203 * OK, we've found the last block that must survive. The rest of our 3337 * OK, we've found the last block that must survive. The rest of our
@@ -3216,7 +3350,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
3216 } 3350 }
3217 /* Writer: end */ 3351 /* Writer: end */
3218 3352
3219 while(partial > p) { 3353 while (partial > p) {
3220 brelse(partial->bh); 3354 brelse(partial->bh);
3221 partial--; 3355 partial--;
3222 } 3356 }
@@ -3408,9 +3542,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
3408 /* This zaps the entire block. Bottom up. */ 3542 /* This zaps the entire block. Bottom up. */
3409 BUFFER_TRACE(bh, "free child branches"); 3543 BUFFER_TRACE(bh, "free child branches");
3410 ext4_free_branches(handle, inode, bh, 3544 ext4_free_branches(handle, inode, bh,
3411 (__le32*)bh->b_data, 3545 (__le32 *) bh->b_data,
3412 (__le32*)bh->b_data + addr_per_block, 3546 (__le32 *) bh->b_data + addr_per_block,
3413 depth); 3547 depth);
3414 3548
3415 /* 3549 /*
3416 * We've probably journalled the indirect block several 3550 * We've probably journalled the indirect block several
@@ -3578,7 +3712,7 @@ void ext4_truncate(struct inode *inode)
3578 */ 3712 */
3579 down_write(&ei->i_data_sem); 3713 down_write(&ei->i_data_sem);
3580 3714
3581 ext4_discard_reservation(inode); 3715 ext4_discard_preallocations(inode);
3582 3716
3583 /* 3717 /*
3584 * The orphan list entry will now protect us from any crash which 3718 * The orphan list entry will now protect us from any crash which
@@ -3673,41 +3807,6 @@ out_stop:
3673 ext4_journal_stop(handle); 3807 ext4_journal_stop(handle);
3674} 3808}
3675 3809
3676static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
3677 unsigned long ino, struct ext4_iloc *iloc)
3678{
3679 ext4_group_t block_group;
3680 unsigned long offset;
3681 ext4_fsblk_t block;
3682 struct ext4_group_desc *gdp;
3683
3684 if (!ext4_valid_inum(sb, ino)) {
3685 /*
3686 * This error is already checked for in namei.c unless we are
3687 * looking at an NFS filehandle, in which case no error
3688 * report is needed
3689 */
3690 return 0;
3691 }
3692
3693 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
3694 gdp = ext4_get_group_desc(sb, block_group, NULL);
3695 if (!gdp)
3696 return 0;
3697
3698 /*
3699 * Figure out the offset within the block group inode table
3700 */
3701 offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
3702 EXT4_INODE_SIZE(sb);
3703 block = ext4_inode_table(sb, gdp) +
3704 (offset >> EXT4_BLOCK_SIZE_BITS(sb));
3705
3706 iloc->block_group = block_group;
3707 iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
3708 return block;
3709}
3710
3711/* 3810/*
3712 * ext4_get_inode_loc returns with an extra refcount against the inode's 3811 * ext4_get_inode_loc returns with an extra refcount against the inode's
3713 * underlying buffer_head on success. If 'in_mem' is true, we have all 3812 * underlying buffer_head on success. If 'in_mem' is true, we have all
@@ -3717,19 +3816,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
3717static int __ext4_get_inode_loc(struct inode *inode, 3816static int __ext4_get_inode_loc(struct inode *inode,
3718 struct ext4_iloc *iloc, int in_mem) 3817 struct ext4_iloc *iloc, int in_mem)
3719{ 3818{
3720 ext4_fsblk_t block; 3819 struct ext4_group_desc *gdp;
3721 struct buffer_head *bh; 3820 struct buffer_head *bh;
3821 struct super_block *sb = inode->i_sb;
3822 ext4_fsblk_t block;
3823 int inodes_per_block, inode_offset;
3824
3825 iloc->bh = 0;
3826 if (!ext4_valid_inum(sb, inode->i_ino))
3827 return -EIO;
3722 3828
3723 block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); 3829 iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
3724 if (!block) 3830 gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
3831 if (!gdp)
3725 return -EIO; 3832 return -EIO;
3726 3833
3727 bh = sb_getblk(inode->i_sb, block); 3834 /*
3835 * Figure out the offset within the block group inode table
3836 */
3837 inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb));
3838 inode_offset = ((inode->i_ino - 1) %
3839 EXT4_INODES_PER_GROUP(sb));
3840 block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
3841 iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
3842
3843 bh = sb_getblk(sb, block);
3728 if (!bh) { 3844 if (!bh) {
3729 ext4_error (inode->i_sb, "ext4_get_inode_loc", 3845 ext4_error(sb, "ext4_get_inode_loc", "unable to read "
3730 "unable to read inode block - " 3846 "inode block - inode=%lu, block=%llu",
3731 "inode=%lu, block=%llu", 3847 inode->i_ino, block);
3732 inode->i_ino, block);
3733 return -EIO; 3848 return -EIO;
3734 } 3849 }
3735 if (!buffer_uptodate(bh)) { 3850 if (!buffer_uptodate(bh)) {
@@ -3757,28 +3872,12 @@ static int __ext4_get_inode_loc(struct inode *inode,
3757 */ 3872 */
3758 if (in_mem) { 3873 if (in_mem) {
3759 struct buffer_head *bitmap_bh; 3874 struct buffer_head *bitmap_bh;
3760 struct ext4_group_desc *desc; 3875 int i, start;
3761 int inodes_per_buffer;
3762 int inode_offset, i;
3763 ext4_group_t block_group;
3764 int start;
3765
3766 block_group = (inode->i_ino - 1) /
3767 EXT4_INODES_PER_GROUP(inode->i_sb);
3768 inodes_per_buffer = bh->b_size /
3769 EXT4_INODE_SIZE(inode->i_sb);
3770 inode_offset = ((inode->i_ino - 1) %
3771 EXT4_INODES_PER_GROUP(inode->i_sb));
3772 start = inode_offset & ~(inodes_per_buffer - 1);
3773 3876
3774 /* Is the inode bitmap in cache? */ 3877 start = inode_offset & ~(inodes_per_block - 1);
3775 desc = ext4_get_group_desc(inode->i_sb,
3776 block_group, NULL);
3777 if (!desc)
3778 goto make_io;
3779 3878
3780 bitmap_bh = sb_getblk(inode->i_sb, 3879 /* Is the inode bitmap in cache? */
3781 ext4_inode_bitmap(inode->i_sb, desc)); 3880 bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
3782 if (!bitmap_bh) 3881 if (!bitmap_bh)
3783 goto make_io; 3882 goto make_io;
3784 3883
@@ -3791,14 +3890,14 @@ static int __ext4_get_inode_loc(struct inode *inode,
3791 brelse(bitmap_bh); 3890 brelse(bitmap_bh);
3792 goto make_io; 3891 goto make_io;
3793 } 3892 }
3794 for (i = start; i < start + inodes_per_buffer; i++) { 3893 for (i = start; i < start + inodes_per_block; i++) {
3795 if (i == inode_offset) 3894 if (i == inode_offset)
3796 continue; 3895 continue;
3797 if (ext4_test_bit(i, bitmap_bh->b_data)) 3896 if (ext4_test_bit(i, bitmap_bh->b_data))
3798 break; 3897 break;
3799 } 3898 }
3800 brelse(bitmap_bh); 3899 brelse(bitmap_bh);
3801 if (i == start + inodes_per_buffer) { 3900 if (i == start + inodes_per_block) {
3802 /* all other inodes are free, so skip I/O */ 3901 /* all other inodes are free, so skip I/O */
3803 memset(bh->b_data, 0, bh->b_size); 3902 memset(bh->b_data, 0, bh->b_size);
3804 set_buffer_uptodate(bh); 3903 set_buffer_uptodate(bh);
@@ -3809,6 +3908,36 @@ static int __ext4_get_inode_loc(struct inode *inode,
3809 3908
3810make_io: 3909make_io:
3811 /* 3910 /*
3911 * If we need to do any I/O, try to pre-readahead extra
3912 * blocks from the inode table.
3913 */
3914 if (EXT4_SB(sb)->s_inode_readahead_blks) {
3915 ext4_fsblk_t b, end, table;
3916 unsigned num;
3917
3918 table = ext4_inode_table(sb, gdp);
3919 /* Make sure s_inode_readahead_blks is a power of 2 */
3920 while (EXT4_SB(sb)->s_inode_readahead_blks &
3921 (EXT4_SB(sb)->s_inode_readahead_blks-1))
3922 EXT4_SB(sb)->s_inode_readahead_blks =
3923 (EXT4_SB(sb)->s_inode_readahead_blks &
3924 (EXT4_SB(sb)->s_inode_readahead_blks-1));
3925 b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
3926 if (table > b)
3927 b = table;
3928 end = b + EXT4_SB(sb)->s_inode_readahead_blks;
3929 num = EXT4_INODES_PER_GROUP(sb);
3930 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3931 EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
3932 num -= le16_to_cpu(gdp->bg_itable_unused);
3933 table += num / inodes_per_block;
3934 if (end > table)
3935 end = table;
3936 while (b <= end)
3937 sb_breadahead(sb, b++);
3938 }
3939
3940 /*
3812 * There are other valid inodes in the buffer, this inode 3941 * There are other valid inodes in the buffer, this inode
3813 * has in-inode xattrs, or we don't have this inode in memory. 3942 * has in-inode xattrs, or we don't have this inode in memory.
3814 * Read the block from disk. 3943 * Read the block from disk.
@@ -3818,10 +3947,9 @@ make_io:
3818 submit_bh(READ_META, bh); 3947 submit_bh(READ_META, bh);
3819 wait_on_buffer(bh); 3948 wait_on_buffer(bh);
3820 if (!buffer_uptodate(bh)) { 3949 if (!buffer_uptodate(bh)) {
3821 ext4_error(inode->i_sb, "ext4_get_inode_loc", 3950 ext4_error(sb, __func__,
3822 "unable to read inode block - " 3951 "unable to read inode block - inode=%lu, "
3823 "inode=%lu, block=%llu", 3952 "block=%llu", inode->i_ino, block);
3824 inode->i_ino, block);
3825 brelse(bh); 3953 brelse(bh);
3826 return -EIO; 3954 return -EIO;
3827 } 3955 }
@@ -3913,11 +4041,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3913 return inode; 4041 return inode;
3914 4042
3915 ei = EXT4_I(inode); 4043 ei = EXT4_I(inode);
3916#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 4044#ifdef CONFIG_EXT4_FS_POSIX_ACL
3917 ei->i_acl = EXT4_ACL_NOT_CACHED; 4045 ei->i_acl = EXT4_ACL_NOT_CACHED;
3918 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 4046 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
3919#endif 4047#endif
3920 ei->i_block_alloc_info = NULL;
3921 4048
3922 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4049 ret = __ext4_get_inode_loc(inode, &iloc, 0);
3923 if (ret < 0) 4050 if (ret < 0)
@@ -3927,7 +4054,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3927 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 4054 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
3928 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 4055 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
3929 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); 4056 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
3930 if(!(test_opt (inode->i_sb, NO_UID32))) { 4057 if (!(test_opt(inode->i_sb, NO_UID32))) {
3931 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 4058 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3932 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 4059 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3933 } 4060 }
@@ -3945,7 +4072,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3945 if (inode->i_mode == 0 || 4072 if (inode->i_mode == 0 ||
3946 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { 4073 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
3947 /* this inode is deleted */ 4074 /* this inode is deleted */
3948 brelse (bh); 4075 brelse(bh);
3949 ret = -ESTALE; 4076 ret = -ESTALE;
3950 goto bad_inode; 4077 goto bad_inode;
3951 } 4078 }
@@ -3978,7 +4105,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3978 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 4105 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
3979 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 4106 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
3980 EXT4_INODE_SIZE(inode->i_sb)) { 4107 EXT4_INODE_SIZE(inode->i_sb)) {
3981 brelse (bh); 4108 brelse(bh);
3982 ret = -EIO; 4109 ret = -EIO;
3983 goto bad_inode; 4110 goto bad_inode;
3984 } 4111 }
@@ -4031,7 +4158,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4031 init_special_inode(inode, inode->i_mode, 4158 init_special_inode(inode, inode->i_mode,
4032 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 4159 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4033 } 4160 }
4034 brelse (iloc.bh); 4161 brelse(iloc.bh);
4035 ext4_set_inode_flags(inode); 4162 ext4_set_inode_flags(inode);
4036 unlock_new_inode(inode); 4163 unlock_new_inode(inode);
4037 return inode; 4164 return inode;
@@ -4113,14 +4240,14 @@ static int ext4_do_update_inode(handle_t *handle,
4113 4240
4114 ext4_get_inode_flags(ei); 4241 ext4_get_inode_flags(ei);
4115 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 4242 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
4116 if(!(test_opt(inode->i_sb, NO_UID32))) { 4243 if (!(test_opt(inode->i_sb, NO_UID32))) {
4117 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); 4244 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
4118 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); 4245 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
4119/* 4246/*
4120 * Fix up interoperability with old kernels. Otherwise, old inodes get 4247 * Fix up interoperability with old kernels. Otherwise, old inodes get
4121 * re-used with the upper 16 bits of the uid/gid intact 4248 * re-used with the upper 16 bits of the uid/gid intact
4122 */ 4249 */
4123 if(!ei->i_dtime) { 4250 if (!ei->i_dtime) {
4124 raw_inode->i_uid_high = 4251 raw_inode->i_uid_high =
4125 cpu_to_le16(high_16_bits(inode->i_uid)); 4252 cpu_to_le16(high_16_bits(inode->i_uid));
4126 raw_inode->i_gid_high = 4253 raw_inode->i_gid_high =
@@ -4208,7 +4335,7 @@ static int ext4_do_update_inode(handle_t *handle,
4208 ei->i_state &= ~EXT4_STATE_NEW; 4335 ei->i_state &= ~EXT4_STATE_NEW;
4209 4336
4210out_brelse: 4337out_brelse:
4211 brelse (bh); 4338 brelse(bh);
4212 ext4_std_error(inode->i_sb, err); 4339 ext4_std_error(inode->i_sb, err);
4213 return err; 4340 return err;
4214} 4341}
@@ -4811,6 +4938,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
4811 loff_t size; 4938 loff_t size;
4812 unsigned long len; 4939 unsigned long len;
4813 int ret = -EINVAL; 4940 int ret = -EINVAL;
4941 void *fsdata;
4814 struct file *file = vma->vm_file; 4942 struct file *file = vma->vm_file;
4815 struct inode *inode = file->f_path.dentry->d_inode; 4943 struct inode *inode = file->f_path.dentry->d_inode;
4816 struct address_space *mapping = inode->i_mapping; 4944 struct address_space *mapping = inode->i_mapping;
@@ -4849,11 +4977,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
4849 * on the same page though 4977 * on the same page though
4850 */ 4978 */
4851 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), 4979 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
4852 len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); 4980 len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
4853 if (ret < 0) 4981 if (ret < 0)
4854 goto out_unlock; 4982 goto out_unlock;
4855 ret = mapping->a_ops->write_end(file, mapping, page_offset(page), 4983 ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
4856 len, len, page, NULL); 4984 len, len, page, fsdata);
4857 if (ret < 0) 4985 if (ret < 0)
4858 goto out_unlock; 4986 goto out_unlock;
4859 ret = 0; 4987 ret = 0;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7a6c2f1faba6..dc99b4776d58 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -23,9 +23,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
23 struct inode *inode = filp->f_dentry->d_inode; 23 struct inode *inode = filp->f_dentry->d_inode;
24 struct ext4_inode_info *ei = EXT4_I(inode); 24 struct ext4_inode_info *ei = EXT4_I(inode);
25 unsigned int flags; 25 unsigned int flags;
26 unsigned short rsv_window_size;
27 26
28 ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg); 27 ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
29 28
30 switch (cmd) { 29 switch (cmd) {
31 case EXT4_IOC_GETFLAGS: 30 case EXT4_IOC_GETFLAGS:
@@ -34,7 +33,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
34 return put_user(flags, (int __user *) arg); 33 return put_user(flags, (int __user *) arg);
35 case EXT4_IOC_SETFLAGS: { 34 case EXT4_IOC_SETFLAGS: {
36 handle_t *handle = NULL; 35 handle_t *handle = NULL;
37 int err; 36 int err, migrate = 0;
38 struct ext4_iloc iloc; 37 struct ext4_iloc iloc;
39 unsigned int oldflags; 38 unsigned int oldflags;
40 unsigned int jflag; 39 unsigned int jflag;
@@ -82,6 +81,17 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
82 if (!capable(CAP_SYS_RESOURCE)) 81 if (!capable(CAP_SYS_RESOURCE))
83 goto flags_out; 82 goto flags_out;
84 } 83 }
84 if (oldflags & EXT4_EXTENTS_FL) {
85 /* We don't support clearning extent flags */
86 if (!(flags & EXT4_EXTENTS_FL)) {
87 err = -EOPNOTSUPP;
88 goto flags_out;
89 }
90 } else if (flags & EXT4_EXTENTS_FL) {
91 /* migrate the file */
92 migrate = 1;
93 flags &= ~EXT4_EXTENTS_FL;
94 }
85 95
86 handle = ext4_journal_start(inode, 1); 96 handle = ext4_journal_start(inode, 1);
87 if (IS_ERR(handle)) { 97 if (IS_ERR(handle)) {
@@ -109,6 +119,10 @@ flags_err:
109 119
110 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) 120 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
111 err = ext4_change_inode_journal_flag(inode, jflag); 121 err = ext4_change_inode_journal_flag(inode, jflag);
122 if (err)
123 goto flags_out;
124 if (migrate)
125 err = ext4_ext_migrate(inode);
112flags_out: 126flags_out:
113 mutex_unlock(&inode->i_mutex); 127 mutex_unlock(&inode->i_mutex);
114 mnt_drop_write(filp->f_path.mnt); 128 mnt_drop_write(filp->f_path.mnt);
@@ -175,53 +189,10 @@ setversion_out:
175 return ret; 189 return ret;
176 } 190 }
177#endif 191#endif
178 case EXT4_IOC_GETRSVSZ:
179 if (test_opt(inode->i_sb, RESERVATION)
180 && S_ISREG(inode->i_mode)
181 && ei->i_block_alloc_info) {
182 rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size;
183 return put_user(rsv_window_size, (int __user *)arg);
184 }
185 return -ENOTTY;
186 case EXT4_IOC_SETRSVSZ: {
187 int err;
188
189 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
190 return -ENOTTY;
191
192 if (!is_owner_or_cap(inode))
193 return -EACCES;
194
195 if (get_user(rsv_window_size, (int __user *)arg))
196 return -EFAULT;
197
198 err = mnt_want_write(filp->f_path.mnt);
199 if (err)
200 return err;
201
202 if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
203 rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
204
205 /*
206 * need to allocate reservation structure for this inode
207 * before set the window size
208 */
209 down_write(&ei->i_data_sem);
210 if (!ei->i_block_alloc_info)
211 ext4_init_block_alloc_info(inode);
212
213 if (ei->i_block_alloc_info){
214 struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
215 rsv->rsv_goal_size = rsv_window_size;
216 }
217 up_write(&ei->i_data_sem);
218 mnt_drop_write(filp->f_path.mnt);
219 return 0;
220 }
221 case EXT4_IOC_GROUP_EXTEND: { 192 case EXT4_IOC_GROUP_EXTEND: {
222 ext4_fsblk_t n_blocks_count; 193 ext4_fsblk_t n_blocks_count;
223 struct super_block *sb = inode->i_sb; 194 struct super_block *sb = inode->i_sb;
224 int err; 195 int err, err2;
225 196
226 if (!capable(CAP_SYS_RESOURCE)) 197 if (!capable(CAP_SYS_RESOURCE))
227 return -EPERM; 198 return -EPERM;
@@ -235,8 +206,10 @@ setversion_out:
235 206
236 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); 207 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
237 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 208 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
238 jbd2_journal_flush(EXT4_SB(sb)->s_journal); 209 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
239 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 210 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
211 if (err == 0)
212 err = err2;
240 mnt_drop_write(filp->f_path.mnt); 213 mnt_drop_write(filp->f_path.mnt);
241 214
242 return err; 215 return err;
@@ -244,7 +217,7 @@ setversion_out:
244 case EXT4_IOC_GROUP_ADD: { 217 case EXT4_IOC_GROUP_ADD: {
245 struct ext4_new_group_data input; 218 struct ext4_new_group_data input;
246 struct super_block *sb = inode->i_sb; 219 struct super_block *sb = inode->i_sb;
247 int err; 220 int err, err2;
248 221
249 if (!capable(CAP_SYS_RESOURCE)) 222 if (!capable(CAP_SYS_RESOURCE))
250 return -EPERM; 223 return -EPERM;
@@ -259,15 +232,36 @@ setversion_out:
259 232
260 err = ext4_group_add(sb, &input); 233 err = ext4_group_add(sb, &input);
261 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 234 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
262 jbd2_journal_flush(EXT4_SB(sb)->s_journal); 235 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
263 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 236 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
237 if (err == 0)
238 err = err2;
264 mnt_drop_write(filp->f_path.mnt); 239 mnt_drop_write(filp->f_path.mnt);
265 240
266 return err; 241 return err;
267 } 242 }
268 243
269 case EXT4_IOC_MIGRATE: 244 case EXT4_IOC_MIGRATE:
270 return ext4_ext_migrate(inode, filp, cmd, arg); 245 {
246 int err;
247 if (!is_owner_or_cap(inode))
248 return -EACCES;
249
250 err = mnt_want_write(filp->f_path.mnt);
251 if (err)
252 return err;
253 /*
254 * inode_mutex prevent write and truncate on the file.
255 * Read still goes through. We take i_data_sem in
256 * ext4_ext_swap_inode_data before we switch the
257 * inode format to prevent read.
258 */
259 mutex_lock(&(inode->i_mutex));
260 err = ext4_ext_migrate(inode);
261 mutex_unlock(&(inode->i_mutex));
262 mnt_drop_write(filp->f_path.mnt);
263 return err;
264 }
271 265
272 default: 266 default:
273 return -ENOTTY; 267 return -ENOTTY;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e0e3a5eb1ddb..b580714f0d85 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
477 b2 = (unsigned char *) bitmap; 477 b2 = (unsigned char *) bitmap;
478 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { 478 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
479 if (b1[i] != b2[i]) { 479 if (b1[i] != b2[i]) {
480 printk("corruption in group %lu at byte %u(%u):" 480 printk(KERN_ERR "corruption in group %lu "
481 " %x in copy != %x on disk/prealloc\n", 481 "at byte %u(%u): %x in copy != %x "
482 e4b->bd_group, i, i * 8, b1[i], b2[i]); 482 "on disk/prealloc\n",
483 e4b->bd_group, i, i * 8, b1[i], b2[i]);
483 BUG(); 484 BUG();
484 } 485 }
485 } 486 }
@@ -533,9 +534,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
533 void *buddy; 534 void *buddy;
534 void *buddy2; 535 void *buddy2;
535 536
536 if (!test_opt(sb, MBALLOC))
537 return 0;
538
539 { 537 {
540 static int mb_check_counter; 538 static int mb_check_counter;
541 if (mb_check_counter++ % 100 != 0) 539 if (mb_check_counter++ % 100 != 0)
@@ -784,9 +782,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
784 if (bh[i] == NULL) 782 if (bh[i] == NULL)
785 goto out; 783 goto out;
786 784
787 if (bh_uptodate_or_lock(bh[i])) 785 if (buffer_uptodate(bh[i]) &&
786 !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
788 continue; 787 continue;
789 788
789 lock_buffer(bh[i]);
790 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 790 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
791 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 791 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
792 ext4_init_block_bitmap(sb, bh[i], 792 ext4_init_block_bitmap(sb, bh[i],
@@ -2169,9 +2169,10 @@ static void ext4_mb_history_release(struct super_block *sb)
2169{ 2169{
2170 struct ext4_sb_info *sbi = EXT4_SB(sb); 2170 struct ext4_sb_info *sbi = EXT4_SB(sb);
2171 2171
2172 remove_proc_entry("mb_groups", sbi->s_mb_proc); 2172 if (sbi->s_proc != NULL) {
2173 remove_proc_entry("mb_history", sbi->s_mb_proc); 2173 remove_proc_entry("mb_groups", sbi->s_proc);
2174 2174 remove_proc_entry("mb_history", sbi->s_proc);
2175 }
2175 kfree(sbi->s_mb_history); 2176 kfree(sbi->s_mb_history);
2176} 2177}
2177 2178
@@ -2180,10 +2181,10 @@ static void ext4_mb_history_init(struct super_block *sb)
2180 struct ext4_sb_info *sbi = EXT4_SB(sb); 2181 struct ext4_sb_info *sbi = EXT4_SB(sb);
2181 int i; 2182 int i;
2182 2183
2183 if (sbi->s_mb_proc != NULL) { 2184 if (sbi->s_proc != NULL) {
2184 proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, 2185 proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
2185 &ext4_mb_seq_history_fops, sb); 2186 &ext4_mb_seq_history_fops, sb);
2186 proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, 2187 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2187 &ext4_mb_seq_groups_fops, sb); 2188 &ext4_mb_seq_groups_fops, sb);
2188 } 2189 }
2189 2190
@@ -2485,19 +2486,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2485 unsigned max; 2486 unsigned max;
2486 int ret; 2487 int ret;
2487 2488
2488 if (!test_opt(sb, MBALLOC))
2489 return 0;
2490
2491 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); 2489 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short);
2492 2490
2493 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); 2491 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2494 if (sbi->s_mb_offsets == NULL) { 2492 if (sbi->s_mb_offsets == NULL) {
2495 clear_opt(sbi->s_mount_opt, MBALLOC);
2496 return -ENOMEM; 2493 return -ENOMEM;
2497 } 2494 }
2498 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); 2495 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2499 if (sbi->s_mb_maxs == NULL) { 2496 if (sbi->s_mb_maxs == NULL) {
2500 clear_opt(sbi->s_mount_opt, MBALLOC);
2501 kfree(sbi->s_mb_maxs); 2497 kfree(sbi->s_mb_maxs);
2502 return -ENOMEM; 2498 return -ENOMEM;
2503 } 2499 }
@@ -2520,7 +2516,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2520 /* init file for buddy data */ 2516 /* init file for buddy data */
2521 ret = ext4_mb_init_backend(sb); 2517 ret = ext4_mb_init_backend(sb);
2522 if (ret != 0) { 2518 if (ret != 0) {
2523 clear_opt(sbi->s_mount_opt, MBALLOC);
2524 kfree(sbi->s_mb_offsets); 2519 kfree(sbi->s_mb_offsets);
2525 kfree(sbi->s_mb_maxs); 2520 kfree(sbi->s_mb_maxs);
2526 return ret; 2521 return ret;
@@ -2540,17 +2535,15 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2540 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; 2535 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
2541 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2536 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2542 2537
2543 i = sizeof(struct ext4_locality_group) * nr_cpu_ids; 2538 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2544 sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
2545 if (sbi->s_locality_groups == NULL) { 2539 if (sbi->s_locality_groups == NULL) {
2546 clear_opt(sbi->s_mount_opt, MBALLOC);
2547 kfree(sbi->s_mb_offsets); 2540 kfree(sbi->s_mb_offsets);
2548 kfree(sbi->s_mb_maxs); 2541 kfree(sbi->s_mb_maxs);
2549 return -ENOMEM; 2542 return -ENOMEM;
2550 } 2543 }
2551 for (i = 0; i < nr_cpu_ids; i++) { 2544 for_each_possible_cpu(i) {
2552 struct ext4_locality_group *lg; 2545 struct ext4_locality_group *lg;
2553 lg = &sbi->s_locality_groups[i]; 2546 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2554 mutex_init(&lg->lg_mutex); 2547 mutex_init(&lg->lg_mutex);
2555 for (j = 0; j < PREALLOC_TB_SIZE; j++) 2548 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2556 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); 2549 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
@@ -2560,7 +2553,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2560 ext4_mb_init_per_dev_proc(sb); 2553 ext4_mb_init_per_dev_proc(sb);
2561 ext4_mb_history_init(sb); 2554 ext4_mb_history_init(sb);
2562 2555
2563 printk("EXT4-fs: mballoc enabled\n"); 2556 printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
2564 return 0; 2557 return 0;
2565} 2558}
2566 2559
@@ -2589,9 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
2589 struct ext4_group_info *grinfo; 2582 struct ext4_group_info *grinfo;
2590 struct ext4_sb_info *sbi = EXT4_SB(sb); 2583 struct ext4_sb_info *sbi = EXT4_SB(sb);
2591 2584
2592 if (!test_opt(sb, MBALLOC))
2593 return 0;
2594
2595 /* release freed, non-committed blocks */ 2585 /* release freed, non-committed blocks */
2596 spin_lock(&sbi->s_md_lock); 2586 spin_lock(&sbi->s_md_lock);
2597 list_splice_init(&sbi->s_closed_transaction, 2587 list_splice_init(&sbi->s_closed_transaction,
@@ -2647,8 +2637,7 @@ int ext4_mb_release(struct super_block *sb)
2647 atomic_read(&sbi->s_mb_discarded)); 2637 atomic_read(&sbi->s_mb_discarded));
2648 } 2638 }
2649 2639
2650 kfree(sbi->s_locality_groups); 2640 free_percpu(sbi->s_locality_groups);
2651
2652 ext4_mb_history_release(sb); 2641 ext4_mb_history_release(sb);
2653 ext4_mb_destroy_per_dev_proc(sb); 2642 ext4_mb_destroy_per_dev_proc(sb);
2654 2643
@@ -2721,118 +2710,46 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
2721#define EXT4_MB_STREAM_REQ "stream_req" 2710#define EXT4_MB_STREAM_REQ "stream_req"
2722#define EXT4_MB_GROUP_PREALLOC "group_prealloc" 2711#define EXT4_MB_GROUP_PREALLOC "group_prealloc"
2723 2712
2724
2725
2726#define MB_PROC_FOPS(name) \
2727static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \
2728{ \
2729 struct ext4_sb_info *sbi = m->private; \
2730 \
2731 seq_printf(m, "%ld\n", sbi->s_mb_##name); \
2732 return 0; \
2733} \
2734 \
2735static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\
2736{ \
2737 return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\
2738} \
2739 \
2740static ssize_t ext4_mb_##name##_proc_write(struct file *file, \
2741 const char __user *buf, size_t cnt, loff_t *ppos) \
2742{ \
2743 struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\
2744 char str[32]; \
2745 long value; \
2746 if (cnt >= sizeof(str)) \
2747 return -EINVAL; \
2748 if (copy_from_user(str, buf, cnt)) \
2749 return -EFAULT; \
2750 value = simple_strtol(str, NULL, 0); \
2751 if (value <= 0) \
2752 return -ERANGE; \
2753 sbi->s_mb_##name = value; \
2754 return cnt; \
2755} \
2756 \
2757static const struct file_operations ext4_mb_##name##_proc_fops = { \
2758 .owner = THIS_MODULE, \
2759 .open = ext4_mb_##name##_proc_open, \
2760 .read = seq_read, \
2761 .llseek = seq_lseek, \
2762 .release = single_release, \
2763 .write = ext4_mb_##name##_proc_write, \
2764};
2765
2766MB_PROC_FOPS(stats);
2767MB_PROC_FOPS(max_to_scan);
2768MB_PROC_FOPS(min_to_scan);
2769MB_PROC_FOPS(order2_reqs);
2770MB_PROC_FOPS(stream_request);
2771MB_PROC_FOPS(group_prealloc);
2772
2773#define MB_PROC_HANDLER(name, var) \
2774do { \
2775 proc = proc_create_data(name, mode, sbi->s_mb_proc, \
2776 &ext4_mb_##var##_proc_fops, sbi); \
2777 if (proc == NULL) { \
2778 printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
2779 goto err_out; \
2780 } \
2781} while (0)
2782
2783static int ext4_mb_init_per_dev_proc(struct super_block *sb) 2713static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2784{ 2714{
2785 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; 2715 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
2786 struct ext4_sb_info *sbi = EXT4_SB(sb); 2716 struct ext4_sb_info *sbi = EXT4_SB(sb);
2787 struct proc_dir_entry *proc; 2717 struct proc_dir_entry *proc;
2788 char devname[64];
2789 2718
2790 if (proc_root_ext4 == NULL) { 2719 if (sbi->s_proc == NULL)
2791 sbi->s_mb_proc = NULL;
2792 return -EINVAL; 2720 return -EINVAL;
2793 }
2794 bdevname(sb->s_bdev, devname);
2795 sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
2796
2797 MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
2798 MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan);
2799 MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan);
2800 MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs);
2801 MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request);
2802 MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc);
2803 2721
2722 EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
2723 EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
2724 EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
2725 EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
2726 EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
2727 EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
2804 return 0; 2728 return 0;
2805 2729
2806err_out: 2730err_out:
2807 printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); 2731 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2808 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); 2732 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2809 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); 2733 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2810 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); 2734 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2811 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); 2735 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2812 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); 2736 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2813 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
2814 remove_proc_entry(devname, proc_root_ext4);
2815 sbi->s_mb_proc = NULL;
2816
2817 return -ENOMEM; 2737 return -ENOMEM;
2818} 2738}
2819 2739
2820static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) 2740static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2821{ 2741{
2822 struct ext4_sb_info *sbi = EXT4_SB(sb); 2742 struct ext4_sb_info *sbi = EXT4_SB(sb);
2823 char devname[64];
2824 2743
2825 if (sbi->s_mb_proc == NULL) 2744 if (sbi->s_proc == NULL)
2826 return -EINVAL; 2745 return -EINVAL;
2827 2746
2828 bdevname(sb->s_bdev, devname); 2747 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2829 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); 2748 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2830 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); 2749 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2831 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); 2750 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2832 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); 2751 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2833 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); 2752 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2834 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
2835 remove_proc_entry(devname, proc_root_ext4);
2836 2753
2837 return 0; 2754 return 0;
2838} 2755}
@@ -2854,11 +2771,6 @@ int __init init_ext4_mballoc(void)
2854 kmem_cache_destroy(ext4_pspace_cachep); 2771 kmem_cache_destroy(ext4_pspace_cachep);
2855 return -ENOMEM; 2772 return -ENOMEM;
2856 } 2773 }
2857#ifdef CONFIG_PROC_FS
2858 proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
2859 if (proc_root_ext4 == NULL)
2860 printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n");
2861#endif
2862 return 0; 2774 return 0;
2863} 2775}
2864 2776
@@ -2867,9 +2779,6 @@ void exit_ext4_mballoc(void)
2867 /* XXX: synchronize_rcu(); */ 2779 /* XXX: synchronize_rcu(); */
2868 kmem_cache_destroy(ext4_pspace_cachep); 2780 kmem_cache_destroy(ext4_pspace_cachep);
2869 kmem_cache_destroy(ext4_ac_cachep); 2781 kmem_cache_destroy(ext4_ac_cachep);
2870#ifdef CONFIG_PROC_FS
2871 remove_proc_entry("fs/ext4", NULL);
2872#endif
2873} 2782}
2874 2783
2875 2784
@@ -2879,7 +2788,7 @@ void exit_ext4_mballoc(void)
2879 */ 2788 */
2880static noinline_for_stack int 2789static noinline_for_stack int
2881ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2790ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2882 handle_t *handle) 2791 handle_t *handle, unsigned long reserv_blks)
2883{ 2792{
2884 struct buffer_head *bitmap_bh = NULL; 2793 struct buffer_head *bitmap_bh = NULL;
2885 struct ext4_super_block *es; 2794 struct ext4_super_block *es;
@@ -2968,15 +2877,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2968 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); 2877 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
2969 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2878 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2970 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2879 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
2971 2880 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
2972 /* 2881 /*
2973 * free blocks account has already be reduced/reserved 2882 * Now reduce the dirty block count also. Should not go negative
2974 * at write_begin() time for delayed allocation
2975 * do not double accounting
2976 */ 2883 */
2977 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2884 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2978 percpu_counter_sub(&sbi->s_freeblocks_counter, 2885 /* release all the reserved blocks if non delalloc */
2979 ac->ac_b_ex.fe_len); 2886 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2887 else
2888 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
2889 ac->ac_b_ex.fe_len);
2980 2890
2981 if (sbi->s_log_groups_per_flex) { 2891 if (sbi->s_log_groups_per_flex) {
2982 ext4_group_t flex_group = ext4_flex_group(sbi, 2892 ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -3884,7 +3794,7 @@ out:
3884 * 3794 *
3885 * FIXME!! Make sure it is valid at all the call sites 3795 * FIXME!! Make sure it is valid at all the call sites
3886 */ 3796 */
3887void ext4_mb_discard_inode_preallocations(struct inode *inode) 3797void ext4_discard_preallocations(struct inode *inode)
3888{ 3798{
3889 struct ext4_inode_info *ei = EXT4_I(inode); 3799 struct ext4_inode_info *ei = EXT4_I(inode);
3890 struct super_block *sb = inode->i_sb; 3800 struct super_block *sb = inode->i_sb;
@@ -3896,7 +3806,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3896 struct ext4_buddy e4b; 3806 struct ext4_buddy e4b;
3897 int err; 3807 int err;
3898 3808
3899 if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { 3809 if (!S_ISREG(inode->i_mode)) {
3900 /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ 3810 /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
3901 return; 3811 return;
3902 } 3812 }
@@ -4094,8 +4004,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4094 * per cpu locality group is to reduce the contention between block 4004 * per cpu locality group is to reduce the contention between block
4095 * request from multiple CPUs. 4005 * request from multiple CPUs.
4096 */ 4006 */
4097 ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; 4007 ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id());
4098 put_cpu();
4099 4008
4100 /* we're going to use group allocation */ 4009 /* we're going to use group allocation */
4101 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; 4010 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
@@ -4369,33 +4278,32 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4369ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, 4278ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4370 struct ext4_allocation_request *ar, int *errp) 4279 struct ext4_allocation_request *ar, int *errp)
4371{ 4280{
4281 int freed;
4372 struct ext4_allocation_context *ac = NULL; 4282 struct ext4_allocation_context *ac = NULL;
4373 struct ext4_sb_info *sbi; 4283 struct ext4_sb_info *sbi;
4374 struct super_block *sb; 4284 struct super_block *sb;
4375 ext4_fsblk_t block = 0; 4285 ext4_fsblk_t block = 0;
4376 int freed; 4286 unsigned long inquota;
4377 int inquota; 4287 unsigned long reserv_blks = 0;
4378 4288
4379 sb = ar->inode->i_sb; 4289 sb = ar->inode->i_sb;
4380 sbi = EXT4_SB(sb); 4290 sbi = EXT4_SB(sb);
4381 4291
4382 if (!test_opt(sb, MBALLOC)) {
4383 block = ext4_old_new_blocks(handle, ar->inode, ar->goal,
4384 &(ar->len), errp);
4385 return block;
4386 }
4387 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { 4292 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
4388 /* 4293 /*
4389 * With delalloc we already reserved the blocks 4294 * With delalloc we already reserved the blocks
4390 */ 4295 */
4391 ar->len = ext4_has_free_blocks(sbi, ar->len); 4296 while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
4392 } 4297 /* let others to free the space */
4393 4298 yield();
4394 if (ar->len == 0) { 4299 ar->len = ar->len >> 1;
4395 *errp = -ENOSPC; 4300 }
4396 return 0; 4301 if (!ar->len) {
4302 *errp = -ENOSPC;
4303 return 0;
4304 }
4305 reserv_blks = ar->len;
4397 } 4306 }
4398
4399 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { 4307 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
4400 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4308 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4401 ar->len--; 4309 ar->len--;
@@ -4441,7 +4349,7 @@ repeat:
4441 } 4349 }
4442 4350
4443 if (likely(ac->ac_status == AC_STATUS_FOUND)) { 4351 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4444 *errp = ext4_mb_mark_diskspace_used(ac, handle); 4352 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
4445 if (*errp == -EAGAIN) { 4353 if (*errp == -EAGAIN) {
4446 ac->ac_b_ex.fe_group = 0; 4354 ac->ac_b_ex.fe_group = 0;
4447 ac->ac_b_ex.fe_start = 0; 4355 ac->ac_b_ex.fe_start = 0;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index c7c9906c2a75..b3b4828f8b89 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -257,7 +257,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac);
257 257
258#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 258#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
259 259
260static struct proc_dir_entry *proc_root_ext4;
261struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); 260struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
262 261
263static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 262static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 46fc0b5b12ba..f2a9cf498ecd 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -447,8 +447,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
447 447
448} 448}
449 449
450int ext4_ext_migrate(struct inode *inode, struct file *filp, 450int ext4_ext_migrate(struct inode *inode)
451 unsigned int cmd, unsigned long arg)
452{ 451{
453 handle_t *handle; 452 handle_t *handle;
454 int retval = 0, i; 453 int retval = 0, i;
@@ -516,12 +515,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
516 * when we add extents we extent the journal 515 * when we add extents we extent the journal
517 */ 516 */
518 /* 517 /*
519 * inode_mutex prevent write and truncate on the file. Read still goes
520 * through. We take i_data_sem in ext4_ext_swap_inode_data before we
521 * switch the inode format to prevent read.
522 */
523 mutex_lock(&(inode->i_mutex));
524 /*
525 * Even though we take i_mutex we can still cause block allocation 518 * Even though we take i_mutex we can still cause block allocation
526 * via mmap write to holes. If we have allocated new blocks we fail 519 * via mmap write to holes. If we have allocated new blocks we fail
527 * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. 520 * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag.
@@ -623,7 +616,6 @@ err_out:
623 tmp_inode->i_nlink = 0; 616 tmp_inode->i_nlink = 0;
624 617
625 ext4_journal_stop(handle); 618 ext4_journal_stop(handle);
626 mutex_unlock(&(inode->i_mutex));
627 619
628 if (tmp_inode) 620 if (tmp_inode)
629 iput(tmp_inode); 621 iput(tmp_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 387ad98350c3..92db9e945147 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -151,34 +151,36 @@ struct dx_map_entry
151 151
152static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); 152static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
153static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); 153static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
154static inline unsigned dx_get_hash (struct dx_entry *entry); 154static inline unsigned dx_get_hash(struct dx_entry *entry);
155static void dx_set_hash (struct dx_entry *entry, unsigned value); 155static void dx_set_hash(struct dx_entry *entry, unsigned value);
156static unsigned dx_get_count (struct dx_entry *entries); 156static unsigned dx_get_count(struct dx_entry *entries);
157static unsigned dx_get_limit (struct dx_entry *entries); 157static unsigned dx_get_limit(struct dx_entry *entries);
158static void dx_set_count (struct dx_entry *entries, unsigned value); 158static void dx_set_count(struct dx_entry *entries, unsigned value);
159static void dx_set_limit (struct dx_entry *entries, unsigned value); 159static void dx_set_limit(struct dx_entry *entries, unsigned value);
160static unsigned dx_root_limit (struct inode *dir, unsigned infosize); 160static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
161static unsigned dx_node_limit (struct inode *dir); 161static unsigned dx_node_limit(struct inode *dir);
162static struct dx_frame *dx_probe(struct dentry *dentry, 162static struct dx_frame *dx_probe(const struct qstr *d_name,
163 struct inode *dir, 163 struct inode *dir,
164 struct dx_hash_info *hinfo, 164 struct dx_hash_info *hinfo,
165 struct dx_frame *frame, 165 struct dx_frame *frame,
166 int *err); 166 int *err);
167static void dx_release (struct dx_frame *frames); 167static void dx_release(struct dx_frame *frames);
168static int dx_make_map (struct ext4_dir_entry_2 *de, int size, 168static int dx_make_map(struct ext4_dir_entry_2 *de, int size,
169 struct dx_hash_info *hinfo, struct dx_map_entry map[]); 169 struct dx_hash_info *hinfo, struct dx_map_entry map[]);
170static void dx_sort_map(struct dx_map_entry *map, unsigned count); 170static void dx_sort_map(struct dx_map_entry *map, unsigned count);
171static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, 171static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
172 struct dx_map_entry *offsets, int count); 172 struct dx_map_entry *offsets, int count);
173static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); 173static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size);
174static void dx_insert_block(struct dx_frame *frame, 174static void dx_insert_block(struct dx_frame *frame,
175 u32 hash, ext4_lblk_t block); 175 u32 hash, ext4_lblk_t block);
176static int ext4_htree_next_block(struct inode *dir, __u32 hash, 176static int ext4_htree_next_block(struct inode *dir, __u32 hash,
177 struct dx_frame *frame, 177 struct dx_frame *frame,
178 struct dx_frame *frames, 178 struct dx_frame *frames,
179 __u32 *start_hash); 179 __u32 *start_hash);
180static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, 180static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
181 struct ext4_dir_entry_2 **res_dir, int *err); 181 const struct qstr *d_name,
182 struct ext4_dir_entry_2 **res_dir,
183 int *err);
182static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, 184static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
183 struct inode *inode); 185 struct inode *inode);
184 186
@@ -207,44 +209,44 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
207 entry->block = cpu_to_le32(value); 209 entry->block = cpu_to_le32(value);
208} 210}
209 211
210static inline unsigned dx_get_hash (struct dx_entry *entry) 212static inline unsigned dx_get_hash(struct dx_entry *entry)
211{ 213{
212 return le32_to_cpu(entry->hash); 214 return le32_to_cpu(entry->hash);
213} 215}
214 216
215static inline void dx_set_hash (struct dx_entry *entry, unsigned value) 217static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
216{ 218{
217 entry->hash = cpu_to_le32(value); 219 entry->hash = cpu_to_le32(value);
218} 220}
219 221
220static inline unsigned dx_get_count (struct dx_entry *entries) 222static inline unsigned dx_get_count(struct dx_entry *entries)
221{ 223{
222 return le16_to_cpu(((struct dx_countlimit *) entries)->count); 224 return le16_to_cpu(((struct dx_countlimit *) entries)->count);
223} 225}
224 226
225static inline unsigned dx_get_limit (struct dx_entry *entries) 227static inline unsigned dx_get_limit(struct dx_entry *entries)
226{ 228{
227 return le16_to_cpu(((struct dx_countlimit *) entries)->limit); 229 return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
228} 230}
229 231
230static inline void dx_set_count (struct dx_entry *entries, unsigned value) 232static inline void dx_set_count(struct dx_entry *entries, unsigned value)
231{ 233{
232 ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); 234 ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
233} 235}
234 236
235static inline void dx_set_limit (struct dx_entry *entries, unsigned value) 237static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
236{ 238{
237 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); 239 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
238} 240}
239 241
240static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) 242static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
241{ 243{
242 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - 244 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
243 EXT4_DIR_REC_LEN(2) - infosize; 245 EXT4_DIR_REC_LEN(2) - infosize;
244 return entry_space / sizeof(struct dx_entry); 246 return entry_space / sizeof(struct dx_entry);
245} 247}
246 248
247static inline unsigned dx_node_limit (struct inode *dir) 249static inline unsigned dx_node_limit(struct inode *dir)
248{ 250{
249 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); 251 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
250 return entry_space / sizeof(struct dx_entry); 252 return entry_space / sizeof(struct dx_entry);
@@ -254,12 +256,12 @@ static inline unsigned dx_node_limit (struct inode *dir)
254 * Debug 256 * Debug
255 */ 257 */
256#ifdef DX_DEBUG 258#ifdef DX_DEBUG
257static void dx_show_index (char * label, struct dx_entry *entries) 259static void dx_show_index(char * label, struct dx_entry *entries)
258{ 260{
259 int i, n = dx_get_count (entries); 261 int i, n = dx_get_count (entries);
260 printk("%s index ", label); 262 printk(KERN_DEBUG "%s index ", label);
261 for (i = 0; i < n; i++) { 263 for (i = 0; i < n; i++) {
262 printk("%x->%lu ", i? dx_get_hash(entries + i) : 264 printk("%x->%lu ", i ? dx_get_hash(entries + i) :
263 0, (unsigned long)dx_get_block(entries + i)); 265 0, (unsigned long)dx_get_block(entries + i));
264 } 266 }
265 printk("\n"); 267 printk("\n");
@@ -306,7 +308,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
306 struct dx_entry *entries, int levels) 308 struct dx_entry *entries, int levels)
307{ 309{
308 unsigned blocksize = dir->i_sb->s_blocksize; 310 unsigned blocksize = dir->i_sb->s_blocksize;
309 unsigned count = dx_get_count (entries), names = 0, space = 0, i; 311 unsigned count = dx_get_count(entries), names = 0, space = 0, i;
310 unsigned bcount = 0; 312 unsigned bcount = 0;
311 struct buffer_head *bh; 313 struct buffer_head *bh;
312 int err; 314 int err;
@@ -325,11 +327,12 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
325 names += stats.names; 327 names += stats.names;
326 space += stats.space; 328 space += stats.space;
327 bcount += stats.bcount; 329 bcount += stats.bcount;
328 brelse (bh); 330 brelse(bh);
329 } 331 }
330 if (bcount) 332 if (bcount)
331 printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", 333 printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
332 names, space/bcount,(space/bcount)*100/blocksize); 334 levels ? "" : " ", names, space/bcount,
335 (space/bcount)*100/blocksize);
333 return (struct stats) { names, space, bcount}; 336 return (struct stats) { names, space, bcount};
334} 337}
335#endif /* DX_DEBUG */ 338#endif /* DX_DEBUG */
@@ -344,7 +347,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
344 * back to userspace. 347 * back to userspace.
345 */ 348 */
346static struct dx_frame * 349static struct dx_frame *
347dx_probe(struct dentry *dentry, struct inode *dir, 350dx_probe(const struct qstr *d_name, struct inode *dir,
348 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) 351 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
349{ 352{
350 unsigned count, indirect; 353 unsigned count, indirect;
@@ -355,8 +358,6 @@ dx_probe(struct dentry *dentry, struct inode *dir,
355 u32 hash; 358 u32 hash;
356 359
357 frame->bh = NULL; 360 frame->bh = NULL;
358 if (dentry)
359 dir = dentry->d_parent->d_inode;
360 if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) 361 if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
361 goto fail; 362 goto fail;
362 root = (struct dx_root *) bh->b_data; 363 root = (struct dx_root *) bh->b_data;
@@ -372,8 +373,8 @@ dx_probe(struct dentry *dentry, struct inode *dir,
372 } 373 }
373 hinfo->hash_version = root->info.hash_version; 374 hinfo->hash_version = root->info.hash_version;
374 hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; 375 hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
375 if (dentry) 376 if (d_name)
376 ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); 377 ext4fs_dirhash(d_name->name, d_name->len, hinfo);
377 hash = hinfo->hash; 378 hash = hinfo->hash;
378 379
379 if (root->info.unused_flags & 1) { 380 if (root->info.unused_flags & 1) {
@@ -406,7 +407,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
406 goto fail; 407 goto fail;
407 } 408 }
408 409
409 dxtrace (printk("Look up %x", hash)); 410 dxtrace(printk("Look up %x", hash));
410 while (1) 411 while (1)
411 { 412 {
412 count = dx_get_count(entries); 413 count = dx_get_count(entries);
@@ -555,7 +556,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
555 0, &err))) 556 0, &err)))
556 return err; /* Failure */ 557 return err; /* Failure */
557 p++; 558 p++;
558 brelse (p->bh); 559 brelse(p->bh);
559 p->bh = bh; 560 p->bh = bh;
560 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; 561 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
561 } 562 }
@@ -593,7 +594,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
593 /* On error, skip the f_pos to the next block. */ 594 /* On error, skip the f_pos to the next block. */
594 dir_file->f_pos = (dir_file->f_pos | 595 dir_file->f_pos = (dir_file->f_pos |
595 (dir->i_sb->s_blocksize - 1)) + 1; 596 (dir->i_sb->s_blocksize - 1)) + 1;
596 brelse (bh); 597 brelse(bh);
597 return count; 598 return count;
598 } 599 }
599 ext4fs_dirhash(de->name, de->name_len, hinfo); 600 ext4fs_dirhash(de->name, de->name_len, hinfo);
@@ -635,8 +636,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
635 int ret, err; 636 int ret, err;
636 __u32 hashval; 637 __u32 hashval;
637 638
638 dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, 639 dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
639 start_minor_hash)); 640 start_hash, start_minor_hash));
640 dir = dir_file->f_path.dentry->d_inode; 641 dir = dir_file->f_path.dentry->d_inode;
641 if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { 642 if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
642 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; 643 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
@@ -648,7 +649,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
648 } 649 }
649 hinfo.hash = start_hash; 650 hinfo.hash = start_hash;
650 hinfo.minor_hash = 0; 651 hinfo.minor_hash = 0;
651 frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); 652 frame = dx_probe(NULL, dir, &hinfo, frames, &err);
652 if (!frame) 653 if (!frame)
653 return err; 654 return err;
654 655
@@ -694,8 +695,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
694 break; 695 break;
695 } 696 }
696 dx_release(frames); 697 dx_release(frames);
697 dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", 698 dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
698 count, *next_hash)); 699 "next hash: %x\n", count, *next_hash));
699 return count; 700 return count;
700errout: 701errout:
701 dx_release(frames); 702 dx_release(frames);
@@ -802,17 +803,17 @@ static inline int ext4_match (int len, const char * const name,
802/* 803/*
803 * Returns 0 if not found, -1 on failure, and 1 on success 804 * Returns 0 if not found, -1 on failure, and 1 on success
804 */ 805 */
805static inline int search_dirblock(struct buffer_head * bh, 806static inline int search_dirblock(struct buffer_head *bh,
806 struct inode *dir, 807 struct inode *dir,
807 struct dentry *dentry, 808 const struct qstr *d_name,
808 unsigned long offset, 809 unsigned long offset,
809 struct ext4_dir_entry_2 ** res_dir) 810 struct ext4_dir_entry_2 ** res_dir)
810{ 811{
811 struct ext4_dir_entry_2 * de; 812 struct ext4_dir_entry_2 * de;
812 char * dlimit; 813 char * dlimit;
813 int de_len; 814 int de_len;
814 const char *name = dentry->d_name.name; 815 const char *name = d_name->name;
815 int namelen = dentry->d_name.len; 816 int namelen = d_name->len;
816 817
817 de = (struct ext4_dir_entry_2 *) bh->b_data; 818 de = (struct ext4_dir_entry_2 *) bh->b_data;
818 dlimit = bh->b_data + dir->i_sb->s_blocksize; 819 dlimit = bh->b_data + dir->i_sb->s_blocksize;
@@ -851,12 +852,13 @@ static inline int search_dirblock(struct buffer_head * bh,
851 * The returned buffer_head has ->b_count elevated. The caller is expected 852 * The returned buffer_head has ->b_count elevated. The caller is expected
852 * to brelse() it when appropriate. 853 * to brelse() it when appropriate.
853 */ 854 */
854static struct buffer_head * ext4_find_entry (struct dentry *dentry, 855static struct buffer_head * ext4_find_entry (struct inode *dir,
856 const struct qstr *d_name,
855 struct ext4_dir_entry_2 ** res_dir) 857 struct ext4_dir_entry_2 ** res_dir)
856{ 858{
857 struct super_block * sb; 859 struct super_block *sb;
858 struct buffer_head * bh_use[NAMEI_RA_SIZE]; 860 struct buffer_head *bh_use[NAMEI_RA_SIZE];
859 struct buffer_head * bh, *ret = NULL; 861 struct buffer_head *bh, *ret = NULL;
860 ext4_lblk_t start, block, b; 862 ext4_lblk_t start, block, b;
861 int ra_max = 0; /* Number of bh's in the readahead 863 int ra_max = 0; /* Number of bh's in the readahead
862 buffer, bh_use[] */ 864 buffer, bh_use[] */
@@ -865,16 +867,15 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
865 int num = 0; 867 int num = 0;
866 ext4_lblk_t nblocks; 868 ext4_lblk_t nblocks;
867 int i, err; 869 int i, err;
868 struct inode *dir = dentry->d_parent->d_inode;
869 int namelen; 870 int namelen;
870 871
871 *res_dir = NULL; 872 *res_dir = NULL;
872 sb = dir->i_sb; 873 sb = dir->i_sb;
873 namelen = dentry->d_name.len; 874 namelen = d_name->len;
874 if (namelen > EXT4_NAME_LEN) 875 if (namelen > EXT4_NAME_LEN)
875 return NULL; 876 return NULL;
876 if (is_dx(dir)) { 877 if (is_dx(dir)) {
877 bh = ext4_dx_find_entry(dentry, res_dir, &err); 878 bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
878 /* 879 /*
879 * On success, or if the error was file not found, 880 * On success, or if the error was file not found,
880 * return. Otherwise, fall back to doing a search the 881 * return. Otherwise, fall back to doing a search the
@@ -882,7 +883,8 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
882 */ 883 */
883 if (bh || (err != ERR_BAD_DX_DIR)) 884 if (bh || (err != ERR_BAD_DX_DIR))
884 return bh; 885 return bh;
885 dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); 886 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
887 "falling back\n"));
886 } 888 }
887 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); 889 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
888 start = EXT4_I(dir)->i_dir_start_lookup; 890 start = EXT4_I(dir)->i_dir_start_lookup;
@@ -926,7 +928,7 @@ restart:
926 brelse(bh); 928 brelse(bh);
927 goto next; 929 goto next;
928 } 930 }
929 i = search_dirblock(bh, dir, dentry, 931 i = search_dirblock(bh, dir, d_name,
930 block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); 932 block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
931 if (i == 1) { 933 if (i == 1) {
932 EXT4_I(dir)->i_dir_start_lookup = block; 934 EXT4_I(dir)->i_dir_start_lookup = block;
@@ -956,11 +958,11 @@ restart:
956cleanup_and_exit: 958cleanup_and_exit:
957 /* Clean up the read-ahead blocks */ 959 /* Clean up the read-ahead blocks */
958 for (; ra_ptr < ra_max; ra_ptr++) 960 for (; ra_ptr < ra_max; ra_ptr++)
959 brelse (bh_use[ra_ptr]); 961 brelse(bh_use[ra_ptr]);
960 return ret; 962 return ret;
961} 963}
962 964
963static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, 965static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
964 struct ext4_dir_entry_2 **res_dir, int *err) 966 struct ext4_dir_entry_2 **res_dir, int *err)
965{ 967{
966 struct super_block * sb; 968 struct super_block * sb;
@@ -971,14 +973,13 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
971 struct buffer_head *bh; 973 struct buffer_head *bh;
972 ext4_lblk_t block; 974 ext4_lblk_t block;
973 int retval; 975 int retval;
974 int namelen = dentry->d_name.len; 976 int namelen = d_name->len;
975 const u8 *name = dentry->d_name.name; 977 const u8 *name = d_name->name;
976 struct inode *dir = dentry->d_parent->d_inode;
977 978
978 sb = dir->i_sb; 979 sb = dir->i_sb;
979 /* NFS may look up ".." - look at dx_root directory block */ 980 /* NFS may look up ".." - look at dx_root directory block */
980 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ 981 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
981 if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) 982 if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
982 return NULL; 983 return NULL;
983 } else { 984 } else {
984 frame = frames; 985 frame = frames;
@@ -1010,7 +1011,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
1010 return bh; 1011 return bh;
1011 } 1012 }
1012 } 1013 }
1013 brelse (bh); 1014 brelse(bh);
1014 /* Check to see if we should continue to search */ 1015 /* Check to see if we should continue to search */
1015 retval = ext4_htree_next_block(dir, hash, frame, 1016 retval = ext4_htree_next_block(dir, hash, frame,
1016 frames, NULL); 1017 frames, NULL);
@@ -1025,25 +1026,25 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
1025 1026
1026 *err = -ENOENT; 1027 *err = -ENOENT;
1027errout: 1028errout:
1028 dxtrace(printk("%s not found\n", name)); 1029 dxtrace(printk(KERN_DEBUG "%s not found\n", name));
1029 dx_release (frames); 1030 dx_release (frames);
1030 return NULL; 1031 return NULL;
1031} 1032}
1032 1033
1033static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 1034static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1034{ 1035{
1035 struct inode * inode; 1036 struct inode *inode;
1036 struct ext4_dir_entry_2 * de; 1037 struct ext4_dir_entry_2 *de;
1037 struct buffer_head * bh; 1038 struct buffer_head *bh;
1038 1039
1039 if (dentry->d_name.len > EXT4_NAME_LEN) 1040 if (dentry->d_name.len > EXT4_NAME_LEN)
1040 return ERR_PTR(-ENAMETOOLONG); 1041 return ERR_PTR(-ENAMETOOLONG);
1041 1042
1042 bh = ext4_find_entry(dentry, &de); 1043 bh = ext4_find_entry(dir, &dentry->d_name, &de);
1043 inode = NULL; 1044 inode = NULL;
1044 if (bh) { 1045 if (bh) {
1045 unsigned long ino = le32_to_cpu(de->inode); 1046 unsigned long ino = le32_to_cpu(de->inode);
1046 brelse (bh); 1047 brelse(bh);
1047 if (!ext4_valid_inum(dir->i_sb, ino)) { 1048 if (!ext4_valid_inum(dir->i_sb, ino)) {
1048 ext4_error(dir->i_sb, "ext4_lookup", 1049 ext4_error(dir->i_sb, "ext4_lookup",
1049 "bad inode number: %lu", ino); 1050 "bad inode number: %lu", ino);
@@ -1062,15 +1063,14 @@ struct dentry *ext4_get_parent(struct dentry *child)
1062 unsigned long ino; 1063 unsigned long ino;
1063 struct dentry *parent; 1064 struct dentry *parent;
1064 struct inode *inode; 1065 struct inode *inode;
1065 struct dentry dotdot; 1066 static const struct qstr dotdot = {
1067 .name = "..",
1068 .len = 2,
1069 };
1066 struct ext4_dir_entry_2 * de; 1070 struct ext4_dir_entry_2 * de;
1067 struct buffer_head *bh; 1071 struct buffer_head *bh;
1068 1072
1069 dotdot.d_name.name = ".."; 1073 bh = ext4_find_entry(child->d_inode, &dotdot, &de);
1070 dotdot.d_name.len = 2;
1071 dotdot.d_parent = child; /* confusing, isn't it! */
1072
1073 bh = ext4_find_entry(&dotdot, &de);
1074 inode = NULL; 1074 inode = NULL;
1075 if (!bh) 1075 if (!bh)
1076 return ERR_PTR(-ENOENT); 1076 return ERR_PTR(-ENOENT);
@@ -1201,10 +1201,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1201 1201
1202 /* create map in the end of data2 block */ 1202 /* create map in the end of data2 block */
1203 map = (struct dx_map_entry *) (data2 + blocksize); 1203 map = (struct dx_map_entry *) (data2 + blocksize);
1204 count = dx_make_map ((struct ext4_dir_entry_2 *) data1, 1204 count = dx_make_map((struct ext4_dir_entry_2 *) data1,
1205 blocksize, hinfo, map); 1205 blocksize, hinfo, map);
1206 map -= count; 1206 map -= count;
1207 dx_sort_map (map, count); 1207 dx_sort_map(map, count);
1208 /* Split the existing block in the middle, size-wise */ 1208 /* Split the existing block in the middle, size-wise */
1209 size = 0; 1209 size = 0;
1210 move = 0; 1210 move = 0;
@@ -1225,7 +1225,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1225 1225
1226 /* Fancy dance to stay within two buffers */ 1226 /* Fancy dance to stay within two buffers */
1227 de2 = dx_move_dirents(data1, data2, map + split, count - split); 1227 de2 = dx_move_dirents(data1, data2, map + split, count - split);
1228 de = dx_pack_dirents(data1,blocksize); 1228 de = dx_pack_dirents(data1, blocksize);
1229 de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); 1229 de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
1230 de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); 1230 de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
1231 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); 1231 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
@@ -1237,15 +1237,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1237 swap(*bh, bh2); 1237 swap(*bh, bh2);
1238 de = de2; 1238 de = de2;
1239 } 1239 }
1240 dx_insert_block (frame, hash2 + continued, newblock); 1240 dx_insert_block(frame, hash2 + continued, newblock);
1241 err = ext4_journal_dirty_metadata (handle, bh2); 1241 err = ext4_journal_dirty_metadata(handle, bh2);
1242 if (err) 1242 if (err)
1243 goto journal_error; 1243 goto journal_error;
1244 err = ext4_journal_dirty_metadata (handle, frame->bh); 1244 err = ext4_journal_dirty_metadata(handle, frame->bh);
1245 if (err) 1245 if (err)
1246 goto journal_error; 1246 goto journal_error;
1247 brelse (bh2); 1247 brelse(bh2);
1248 dxtrace(dx_show_index ("frame", frame->entries)); 1248 dxtrace(dx_show_index("frame", frame->entries));
1249 return de; 1249 return de;
1250 1250
1251journal_error: 1251journal_error:
@@ -1271,7 +1271,7 @@ errout:
1271 */ 1271 */
1272static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, 1272static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1273 struct inode *inode, struct ext4_dir_entry_2 *de, 1273 struct inode *inode, struct ext4_dir_entry_2 *de,
1274 struct buffer_head * bh) 1274 struct buffer_head *bh)
1275{ 1275{
1276 struct inode *dir = dentry->d_parent->d_inode; 1276 struct inode *dir = dentry->d_parent->d_inode;
1277 const char *name = dentry->d_name.name; 1277 const char *name = dentry->d_name.name;
@@ -1288,11 +1288,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1288 while ((char *) de <= top) { 1288 while ((char *) de <= top) {
1289 if (!ext4_check_dir_entry("ext4_add_entry", dir, de, 1289 if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
1290 bh, offset)) { 1290 bh, offset)) {
1291 brelse (bh); 1291 brelse(bh);
1292 return -EIO; 1292 return -EIO;
1293 } 1293 }
1294 if (ext4_match (namelen, name, de)) { 1294 if (ext4_match(namelen, name, de)) {
1295 brelse (bh); 1295 brelse(bh);
1296 return -EEXIST; 1296 return -EEXIST;
1297 } 1297 }
1298 nlen = EXT4_DIR_REC_LEN(de->name_len); 1298 nlen = EXT4_DIR_REC_LEN(de->name_len);
@@ -1329,7 +1329,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1329 } else 1329 } else
1330 de->inode = 0; 1330 de->inode = 0;
1331 de->name_len = namelen; 1331 de->name_len = namelen;
1332 memcpy (de->name, name, namelen); 1332 memcpy(de->name, name, namelen);
1333 /* 1333 /*
1334 * XXX shouldn't update any times until successful 1334 * XXX shouldn't update any times until successful
1335 * completion of syscall, but too many callers depend 1335 * completion of syscall, but too many callers depend
@@ -1377,7 +1377,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1377 struct fake_dirent *fde; 1377 struct fake_dirent *fde;
1378 1378
1379 blocksize = dir->i_sb->s_blocksize; 1379 blocksize = dir->i_sb->s_blocksize;
1380 dxtrace(printk("Creating index\n")); 1380 dxtrace(printk(KERN_DEBUG "Creating index\n"));
1381 retval = ext4_journal_get_write_access(handle, bh); 1381 retval = ext4_journal_get_write_access(handle, bh);
1382 if (retval) { 1382 if (retval) {
1383 ext4_std_error(dir->i_sb, retval); 1383 ext4_std_error(dir->i_sb, retval);
@@ -1386,7 +1386,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1386 } 1386 }
1387 root = (struct dx_root *) bh->b_data; 1387 root = (struct dx_root *) bh->b_data;
1388 1388
1389 bh2 = ext4_append (handle, dir, &block, &retval); 1389 bh2 = ext4_append(handle, dir, &block, &retval);
1390 if (!(bh2)) { 1390 if (!(bh2)) {
1391 brelse(bh); 1391 brelse(bh);
1392 return retval; 1392 return retval;
@@ -1412,9 +1412,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1412 root->info.info_length = sizeof(root->info); 1412 root->info.info_length = sizeof(root->info);
1413 root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; 1413 root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
1414 entries = root->entries; 1414 entries = root->entries;
1415 dx_set_block (entries, 1); 1415 dx_set_block(entries, 1);
1416 dx_set_count (entries, 1); 1416 dx_set_count(entries, 1);
1417 dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); 1417 dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
1418 1418
1419 /* Initialize as for dx_probe */ 1419 /* Initialize as for dx_probe */
1420 hinfo.hash_version = root->info.hash_version; 1420 hinfo.hash_version = root->info.hash_version;
@@ -1443,14 +1443,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1443 * may not sleep between calling this and putting something into 1443 * may not sleep between calling this and putting something into
1444 * the entry, as someone else might have used it while you slept. 1444 * the entry, as someone else might have used it while you slept.
1445 */ 1445 */
1446static int ext4_add_entry (handle_t *handle, struct dentry *dentry, 1446static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1447 struct inode *inode) 1447 struct inode *inode)
1448{ 1448{
1449 struct inode *dir = dentry->d_parent->d_inode; 1449 struct inode *dir = dentry->d_parent->d_inode;
1450 unsigned long offset; 1450 unsigned long offset;
1451 struct buffer_head * bh; 1451 struct buffer_head *bh;
1452 struct ext4_dir_entry_2 *de; 1452 struct ext4_dir_entry_2 *de;
1453 struct super_block * sb; 1453 struct super_block *sb;
1454 int retval; 1454 int retval;
1455 int dx_fallback=0; 1455 int dx_fallback=0;
1456 unsigned blocksize; 1456 unsigned blocksize;
@@ -1500,13 +1500,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1500 struct dx_frame frames[2], *frame; 1500 struct dx_frame frames[2], *frame;
1501 struct dx_entry *entries, *at; 1501 struct dx_entry *entries, *at;
1502 struct dx_hash_info hinfo; 1502 struct dx_hash_info hinfo;
1503 struct buffer_head * bh; 1503 struct buffer_head *bh;
1504 struct inode *dir = dentry->d_parent->d_inode; 1504 struct inode *dir = dentry->d_parent->d_inode;
1505 struct super_block * sb = dir->i_sb; 1505 struct super_block *sb = dir->i_sb;
1506 struct ext4_dir_entry_2 *de; 1506 struct ext4_dir_entry_2 *de;
1507 int err; 1507 int err;
1508 1508
1509 frame = dx_probe(dentry, NULL, &hinfo, frames, &err); 1509 frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
1510 if (!frame) 1510 if (!frame)
1511 return err; 1511 return err;
1512 entries = frame->entries; 1512 entries = frame->entries;
@@ -1527,7 +1527,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1527 } 1527 }
1528 1528
1529 /* Block full, should compress but for now just split */ 1529 /* Block full, should compress but for now just split */
1530 dxtrace(printk("using %u of %u node entries\n", 1530 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
1531 dx_get_count(entries), dx_get_limit(entries))); 1531 dx_get_count(entries), dx_get_limit(entries)));
1532 /* Need to split index? */ 1532 /* Need to split index? */
1533 if (dx_get_count(entries) == dx_get_limit(entries)) { 1533 if (dx_get_count(entries) == dx_get_limit(entries)) {
@@ -1559,7 +1559,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1559 if (levels) { 1559 if (levels) {
1560 unsigned icount1 = icount/2, icount2 = icount - icount1; 1560 unsigned icount1 = icount/2, icount2 = icount - icount1;
1561 unsigned hash2 = dx_get_hash(entries + icount1); 1561 unsigned hash2 = dx_get_hash(entries + icount1);
1562 dxtrace(printk("Split index %i/%i\n", icount1, icount2)); 1562 dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
1563 icount1, icount2));
1563 1564
1564 BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ 1565 BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
1565 err = ext4_journal_get_write_access(handle, 1566 err = ext4_journal_get_write_access(handle,
@@ -1567,11 +1568,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1567 if (err) 1568 if (err)
1568 goto journal_error; 1569 goto journal_error;
1569 1570
1570 memcpy ((char *) entries2, (char *) (entries + icount1), 1571 memcpy((char *) entries2, (char *) (entries + icount1),
1571 icount2 * sizeof(struct dx_entry)); 1572 icount2 * sizeof(struct dx_entry));
1572 dx_set_count (entries, icount1); 1573 dx_set_count(entries, icount1);
1573 dx_set_count (entries2, icount2); 1574 dx_set_count(entries2, icount2);
1574 dx_set_limit (entries2, dx_node_limit(dir)); 1575 dx_set_limit(entries2, dx_node_limit(dir));
1575 1576
1576 /* Which index block gets the new entry? */ 1577 /* Which index block gets the new entry? */
1577 if (at - entries >= icount1) { 1578 if (at - entries >= icount1) {
@@ -1579,16 +1580,17 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1579 frame->entries = entries = entries2; 1580 frame->entries = entries = entries2;
1580 swap(frame->bh, bh2); 1581 swap(frame->bh, bh2);
1581 } 1582 }
1582 dx_insert_block (frames + 0, hash2, newblock); 1583 dx_insert_block(frames + 0, hash2, newblock);
1583 dxtrace(dx_show_index ("node", frames[1].entries)); 1584 dxtrace(dx_show_index("node", frames[1].entries));
1584 dxtrace(dx_show_index ("node", 1585 dxtrace(dx_show_index("node",
1585 ((struct dx_node *) bh2->b_data)->entries)); 1586 ((struct dx_node *) bh2->b_data)->entries));
1586 err = ext4_journal_dirty_metadata(handle, bh2); 1587 err = ext4_journal_dirty_metadata(handle, bh2);
1587 if (err) 1588 if (err)
1588 goto journal_error; 1589 goto journal_error;
1589 brelse (bh2); 1590 brelse (bh2);
1590 } else { 1591 } else {
1591 dxtrace(printk("Creating second level index...\n")); 1592 dxtrace(printk(KERN_DEBUG
1593 "Creating second level index...\n"));
1592 memcpy((char *) entries2, (char *) entries, 1594 memcpy((char *) entries2, (char *) entries,
1593 icount * sizeof(struct dx_entry)); 1595 icount * sizeof(struct dx_entry));
1594 dx_set_limit(entries2, dx_node_limit(dir)); 1596 dx_set_limit(entries2, dx_node_limit(dir));
@@ -1630,12 +1632,12 @@ cleanup:
1630 * ext4_delete_entry deletes a directory entry by merging it with the 1632 * ext4_delete_entry deletes a directory entry by merging it with the
1631 * previous entry 1633 * previous entry
1632 */ 1634 */
1633static int ext4_delete_entry (handle_t *handle, 1635static int ext4_delete_entry(handle_t *handle,
1634 struct inode * dir, 1636 struct inode *dir,
1635 struct ext4_dir_entry_2 * de_del, 1637 struct ext4_dir_entry_2 *de_del,
1636 struct buffer_head * bh) 1638 struct buffer_head *bh)
1637{ 1639{
1638 struct ext4_dir_entry_2 * de, * pde; 1640 struct ext4_dir_entry_2 *de, *pde;
1639 int i; 1641 int i;
1640 1642
1641 i = 0; 1643 i = 0;
@@ -1716,11 +1718,11 @@ static int ext4_add_nondir(handle_t *handle,
1716 * If the create succeeds, we fill in the inode information 1718 * If the create succeeds, we fill in the inode information
1717 * with d_instantiate(). 1719 * with d_instantiate().
1718 */ 1720 */
1719static int ext4_create (struct inode * dir, struct dentry * dentry, int mode, 1721static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
1720 struct nameidata *nd) 1722 struct nameidata *nd)
1721{ 1723{
1722 handle_t *handle; 1724 handle_t *handle;
1723 struct inode * inode; 1725 struct inode *inode;
1724 int err, retries = 0; 1726 int err, retries = 0;
1725 1727
1726retry: 1728retry:
@@ -1747,8 +1749,8 @@ retry:
1747 return err; 1749 return err;
1748} 1750}
1749 1751
1750static int ext4_mknod (struct inode * dir, struct dentry *dentry, 1752static int ext4_mknod(struct inode *dir, struct dentry *dentry,
1751 int mode, dev_t rdev) 1753 int mode, dev_t rdev)
1752{ 1754{
1753 handle_t *handle; 1755 handle_t *handle;
1754 struct inode *inode; 1756 struct inode *inode;
@@ -1767,11 +1769,11 @@ retry:
1767 if (IS_DIRSYNC(dir)) 1769 if (IS_DIRSYNC(dir))
1768 handle->h_sync = 1; 1770 handle->h_sync = 1;
1769 1771
1770 inode = ext4_new_inode (handle, dir, mode); 1772 inode = ext4_new_inode(handle, dir, mode);
1771 err = PTR_ERR(inode); 1773 err = PTR_ERR(inode);
1772 if (!IS_ERR(inode)) { 1774 if (!IS_ERR(inode)) {
1773 init_special_inode(inode, inode->i_mode, rdev); 1775 init_special_inode(inode, inode->i_mode, rdev);
1774#ifdef CONFIG_EXT4DEV_FS_XATTR 1776#ifdef CONFIG_EXT4_FS_XATTR
1775 inode->i_op = &ext4_special_inode_operations; 1777 inode->i_op = &ext4_special_inode_operations;
1776#endif 1778#endif
1777 err = ext4_add_nondir(handle, dentry, inode); 1779 err = ext4_add_nondir(handle, dentry, inode);
@@ -1782,12 +1784,12 @@ retry:
1782 return err; 1784 return err;
1783} 1785}
1784 1786
1785static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) 1787static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1786{ 1788{
1787 handle_t *handle; 1789 handle_t *handle;
1788 struct inode * inode; 1790 struct inode *inode;
1789 struct buffer_head * dir_block; 1791 struct buffer_head *dir_block;
1790 struct ext4_dir_entry_2 * de; 1792 struct ext4_dir_entry_2 *de;
1791 int err, retries = 0; 1793 int err, retries = 0;
1792 1794
1793 if (EXT4_DIR_LINK_MAX(dir)) 1795 if (EXT4_DIR_LINK_MAX(dir))
@@ -1803,7 +1805,7 @@ retry:
1803 if (IS_DIRSYNC(dir)) 1805 if (IS_DIRSYNC(dir))
1804 handle->h_sync = 1; 1806 handle->h_sync = 1;
1805 1807
1806 inode = ext4_new_inode (handle, dir, S_IFDIR | mode); 1808 inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
1807 err = PTR_ERR(inode); 1809 err = PTR_ERR(inode);
1808 if (IS_ERR(inode)) 1810 if (IS_ERR(inode))
1809 goto out_stop; 1811 goto out_stop;
@@ -1811,7 +1813,7 @@ retry:
1811 inode->i_op = &ext4_dir_inode_operations; 1813 inode->i_op = &ext4_dir_inode_operations;
1812 inode->i_fop = &ext4_dir_operations; 1814 inode->i_fop = &ext4_dir_operations;
1813 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1815 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
1814 dir_block = ext4_bread (handle, inode, 0, 1, &err); 1816 dir_block = ext4_bread(handle, inode, 0, 1, &err);
1815 if (!dir_block) 1817 if (!dir_block)
1816 goto out_clear_inode; 1818 goto out_clear_inode;
1817 BUFFER_TRACE(dir_block, "get_write_access"); 1819 BUFFER_TRACE(dir_block, "get_write_access");
@@ -1820,26 +1822,26 @@ retry:
1820 de->inode = cpu_to_le32(inode->i_ino); 1822 de->inode = cpu_to_le32(inode->i_ino);
1821 de->name_len = 1; 1823 de->name_len = 1;
1822 de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); 1824 de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
1823 strcpy (de->name, "."); 1825 strcpy(de->name, ".");
1824 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1826 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1825 de = ext4_next_entry(de); 1827 de = ext4_next_entry(de);
1826 de->inode = cpu_to_le32(dir->i_ino); 1828 de->inode = cpu_to_le32(dir->i_ino);
1827 de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - 1829 de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize -
1828 EXT4_DIR_REC_LEN(1)); 1830 EXT4_DIR_REC_LEN(1));
1829 de->name_len = 2; 1831 de->name_len = 2;
1830 strcpy (de->name, ".."); 1832 strcpy(de->name, "..");
1831 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1833 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1832 inode->i_nlink = 2; 1834 inode->i_nlink = 2;
1833 BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); 1835 BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
1834 ext4_journal_dirty_metadata(handle, dir_block); 1836 ext4_journal_dirty_metadata(handle, dir_block);
1835 brelse (dir_block); 1837 brelse(dir_block);
1836 ext4_mark_inode_dirty(handle, inode); 1838 ext4_mark_inode_dirty(handle, inode);
1837 err = ext4_add_entry (handle, dentry, inode); 1839 err = ext4_add_entry(handle, dentry, inode);
1838 if (err) { 1840 if (err) {
1839out_clear_inode: 1841out_clear_inode:
1840 clear_nlink(inode); 1842 clear_nlink(inode);
1841 ext4_mark_inode_dirty(handle, inode); 1843 ext4_mark_inode_dirty(handle, inode);
1842 iput (inode); 1844 iput(inode);
1843 goto out_stop; 1845 goto out_stop;
1844 } 1846 }
1845 ext4_inc_count(handle, dir); 1847 ext4_inc_count(handle, dir);
@@ -1856,17 +1858,17 @@ out_stop:
1856/* 1858/*
1857 * routine to check that the specified directory is empty (for rmdir) 1859 * routine to check that the specified directory is empty (for rmdir)
1858 */ 1860 */
1859static int empty_dir (struct inode * inode) 1861static int empty_dir(struct inode *inode)
1860{ 1862{
1861 unsigned long offset; 1863 unsigned long offset;
1862 struct buffer_head * bh; 1864 struct buffer_head *bh;
1863 struct ext4_dir_entry_2 * de, * de1; 1865 struct ext4_dir_entry_2 *de, *de1;
1864 struct super_block * sb; 1866 struct super_block *sb;
1865 int err = 0; 1867 int err = 0;
1866 1868
1867 sb = inode->i_sb; 1869 sb = inode->i_sb;
1868 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 1870 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
1869 !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { 1871 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
1870 if (err) 1872 if (err)
1871 ext4_error(inode->i_sb, __func__, 1873 ext4_error(inode->i_sb, __func__,
1872 "error %d reading directory #%lu offset 0", 1874 "error %d reading directory #%lu offset 0",
@@ -1881,23 +1883,23 @@ static int empty_dir (struct inode * inode)
1881 de1 = ext4_next_entry(de); 1883 de1 = ext4_next_entry(de);
1882 if (le32_to_cpu(de->inode) != inode->i_ino || 1884 if (le32_to_cpu(de->inode) != inode->i_ino ||
1883 !le32_to_cpu(de1->inode) || 1885 !le32_to_cpu(de1->inode) ||
1884 strcmp (".", de->name) || 1886 strcmp(".", de->name) ||
1885 strcmp ("..", de1->name)) { 1887 strcmp("..", de1->name)) {
1886 ext4_warning (inode->i_sb, "empty_dir", 1888 ext4_warning(inode->i_sb, "empty_dir",
1887 "bad directory (dir #%lu) - no `.' or `..'", 1889 "bad directory (dir #%lu) - no `.' or `..'",
1888 inode->i_ino); 1890 inode->i_ino);
1889 brelse (bh); 1891 brelse(bh);
1890 return 1; 1892 return 1;
1891 } 1893 }
1892 offset = ext4_rec_len_from_disk(de->rec_len) + 1894 offset = ext4_rec_len_from_disk(de->rec_len) +
1893 ext4_rec_len_from_disk(de1->rec_len); 1895 ext4_rec_len_from_disk(de1->rec_len);
1894 de = ext4_next_entry(de1); 1896 de = ext4_next_entry(de1);
1895 while (offset < inode->i_size ) { 1897 while (offset < inode->i_size) {
1896 if (!bh || 1898 if (!bh ||
1897 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { 1899 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
1898 err = 0; 1900 err = 0;
1899 brelse (bh); 1901 brelse(bh);
1900 bh = ext4_bread (NULL, inode, 1902 bh = ext4_bread(NULL, inode,
1901 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); 1903 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
1902 if (!bh) { 1904 if (!bh) {
1903 if (err) 1905 if (err)
@@ -1917,13 +1919,13 @@ static int empty_dir (struct inode * inode)
1917 continue; 1919 continue;
1918 } 1920 }
1919 if (le32_to_cpu(de->inode)) { 1921 if (le32_to_cpu(de->inode)) {
1920 brelse (bh); 1922 brelse(bh);
1921 return 0; 1923 return 0;
1922 } 1924 }
1923 offset += ext4_rec_len_from_disk(de->rec_len); 1925 offset += ext4_rec_len_from_disk(de->rec_len);
1924 de = ext4_next_entry(de); 1926 de = ext4_next_entry(de);
1925 } 1927 }
1926 brelse (bh); 1928 brelse(bh);
1927 return 1; 1929 return 1;
1928} 1930}
1929 1931
@@ -1954,8 +1956,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
1954 * ->i_nlink. For, say it, character device. Not a regular file, 1956 * ->i_nlink. For, say it, character device. Not a regular file,
1955 * not a directory, not a symlink and ->i_nlink > 0. 1957 * not a directory, not a symlink and ->i_nlink > 0.
1956 */ 1958 */
1957 J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1959 J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1958 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); 1960 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
1959 1961
1960 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); 1962 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
1961 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); 1963 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
@@ -2069,12 +2071,12 @@ out_brelse:
2069 goto out_err; 2071 goto out_err;
2070} 2072}
2071 2073
2072static int ext4_rmdir (struct inode * dir, struct dentry *dentry) 2074static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2073{ 2075{
2074 int retval; 2076 int retval;
2075 struct inode * inode; 2077 struct inode *inode;
2076 struct buffer_head * bh; 2078 struct buffer_head *bh;
2077 struct ext4_dir_entry_2 * de; 2079 struct ext4_dir_entry_2 *de;
2078 handle_t *handle; 2080 handle_t *handle;
2079 2081
2080 /* Initialize quotas before so that eventual writes go in 2082 /* Initialize quotas before so that eventual writes go in
@@ -2085,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
2085 return PTR_ERR(handle); 2087 return PTR_ERR(handle);
2086 2088
2087 retval = -ENOENT; 2089 retval = -ENOENT;
2088 bh = ext4_find_entry (dentry, &de); 2090 bh = ext4_find_entry(dir, &dentry->d_name, &de);
2089 if (!bh) 2091 if (!bh)
2090 goto end_rmdir; 2092 goto end_rmdir;
2091 2093
@@ -2099,16 +2101,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
2099 goto end_rmdir; 2101 goto end_rmdir;
2100 2102
2101 retval = -ENOTEMPTY; 2103 retval = -ENOTEMPTY;
2102 if (!empty_dir (inode)) 2104 if (!empty_dir(inode))
2103 goto end_rmdir; 2105 goto end_rmdir;
2104 2106
2105 retval = ext4_delete_entry(handle, dir, de, bh); 2107 retval = ext4_delete_entry(handle, dir, de, bh);
2106 if (retval) 2108 if (retval)
2107 goto end_rmdir; 2109 goto end_rmdir;
2108 if (!EXT4_DIR_LINK_EMPTY(inode)) 2110 if (!EXT4_DIR_LINK_EMPTY(inode))
2109 ext4_warning (inode->i_sb, "ext4_rmdir", 2111 ext4_warning(inode->i_sb, "ext4_rmdir",
2110 "empty directory has too many links (%d)", 2112 "empty directory has too many links (%d)",
2111 inode->i_nlink); 2113 inode->i_nlink);
2112 inode->i_version++; 2114 inode->i_version++;
2113 clear_nlink(inode); 2115 clear_nlink(inode);
2114 /* There's no need to set i_disksize: the fact that i_nlink is 2116 /* There's no need to set i_disksize: the fact that i_nlink is
@@ -2124,16 +2126,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
2124 2126
2125end_rmdir: 2127end_rmdir:
2126 ext4_journal_stop(handle); 2128 ext4_journal_stop(handle);
2127 brelse (bh); 2129 brelse(bh);
2128 return retval; 2130 return retval;
2129} 2131}
2130 2132
2131static int ext4_unlink(struct inode * dir, struct dentry *dentry) 2133static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2132{ 2134{
2133 int retval; 2135 int retval;
2134 struct inode * inode; 2136 struct inode *inode;
2135 struct buffer_head * bh; 2137 struct buffer_head *bh;
2136 struct ext4_dir_entry_2 * de; 2138 struct ext4_dir_entry_2 *de;
2137 handle_t *handle; 2139 handle_t *handle;
2138 2140
2139 /* Initialize quotas before so that eventual writes go 2141 /* Initialize quotas before so that eventual writes go
@@ -2147,7 +2149,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
2147 handle->h_sync = 1; 2149 handle->h_sync = 1;
2148 2150
2149 retval = -ENOENT; 2151 retval = -ENOENT;
2150 bh = ext4_find_entry (dentry, &de); 2152 bh = ext4_find_entry(dir, &dentry->d_name, &de);
2151 if (!bh) 2153 if (!bh)
2152 goto end_unlink; 2154 goto end_unlink;
2153 2155
@@ -2158,9 +2160,9 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
2158 goto end_unlink; 2160 goto end_unlink;
2159 2161
2160 if (!inode->i_nlink) { 2162 if (!inode->i_nlink) {
2161 ext4_warning (inode->i_sb, "ext4_unlink", 2163 ext4_warning(inode->i_sb, "ext4_unlink",
2162 "Deleting nonexistent file (%lu), %d", 2164 "Deleting nonexistent file (%lu), %d",
2163 inode->i_ino, inode->i_nlink); 2165 inode->i_ino, inode->i_nlink);
2164 inode->i_nlink = 1; 2166 inode->i_nlink = 1;
2165 } 2167 }
2166 retval = ext4_delete_entry(handle, dir, de, bh); 2168 retval = ext4_delete_entry(handle, dir, de, bh);
@@ -2178,15 +2180,15 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
2178 2180
2179end_unlink: 2181end_unlink:
2180 ext4_journal_stop(handle); 2182 ext4_journal_stop(handle);
2181 brelse (bh); 2183 brelse(bh);
2182 return retval; 2184 return retval;
2183} 2185}
2184 2186
2185static int ext4_symlink (struct inode * dir, 2187static int ext4_symlink(struct inode *dir,
2186 struct dentry *dentry, const char * symname) 2188 struct dentry *dentry, const char *symname)
2187{ 2189{
2188 handle_t *handle; 2190 handle_t *handle;
2189 struct inode * inode; 2191 struct inode *inode;
2190 int l, err, retries = 0; 2192 int l, err, retries = 0;
2191 2193
2192 l = strlen(symname)+1; 2194 l = strlen(symname)+1;
@@ -2203,12 +2205,12 @@ retry:
2203 if (IS_DIRSYNC(dir)) 2205 if (IS_DIRSYNC(dir))
2204 handle->h_sync = 1; 2206 handle->h_sync = 1;
2205 2207
2206 inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); 2208 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
2207 err = PTR_ERR(inode); 2209 err = PTR_ERR(inode);
2208 if (IS_ERR(inode)) 2210 if (IS_ERR(inode))
2209 goto out_stop; 2211 goto out_stop;
2210 2212
2211 if (l > sizeof (EXT4_I(inode)->i_data)) { 2213 if (l > sizeof(EXT4_I(inode)->i_data)) {
2212 inode->i_op = &ext4_symlink_inode_operations; 2214 inode->i_op = &ext4_symlink_inode_operations;
2213 ext4_set_aops(inode); 2215 ext4_set_aops(inode);
2214 /* 2216 /*
@@ -2221,14 +2223,14 @@ retry:
2221 if (err) { 2223 if (err) {
2222 clear_nlink(inode); 2224 clear_nlink(inode);
2223 ext4_mark_inode_dirty(handle, inode); 2225 ext4_mark_inode_dirty(handle, inode);
2224 iput (inode); 2226 iput(inode);
2225 goto out_stop; 2227 goto out_stop;
2226 } 2228 }
2227 } else { 2229 } else {
2228 /* clear the extent format for fast symlink */ 2230 /* clear the extent format for fast symlink */
2229 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; 2231 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
2230 inode->i_op = &ext4_fast_symlink_inode_operations; 2232 inode->i_op = &ext4_fast_symlink_inode_operations;
2231 memcpy((char*)&EXT4_I(inode)->i_data,symname,l); 2233 memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
2232 inode->i_size = l-1; 2234 inode->i_size = l-1;
2233 } 2235 }
2234 EXT4_I(inode)->i_disksize = inode->i_size; 2236 EXT4_I(inode)->i_disksize = inode->i_size;
@@ -2240,8 +2242,8 @@ out_stop:
2240 return err; 2242 return err;
2241} 2243}
2242 2244
2243static int ext4_link (struct dentry * old_dentry, 2245static int ext4_link(struct dentry *old_dentry,
2244 struct inode * dir, struct dentry *dentry) 2246 struct inode *dir, struct dentry *dentry)
2245{ 2247{
2246 handle_t *handle; 2248 handle_t *handle;
2247 struct inode *inode = old_dentry->d_inode; 2249 struct inode *inode = old_dentry->d_inode;
@@ -2284,13 +2286,13 @@ retry:
2284 * Anybody can rename anything with this: the permission checks are left to the 2286 * Anybody can rename anything with this: the permission checks are left to the
2285 * higher-level routines. 2287 * higher-level routines.
2286 */ 2288 */
2287static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, 2289static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2288 struct inode * new_dir,struct dentry *new_dentry) 2290 struct inode *new_dir, struct dentry *new_dentry)
2289{ 2291{
2290 handle_t *handle; 2292 handle_t *handle;
2291 struct inode * old_inode, * new_inode; 2293 struct inode *old_inode, *new_inode;
2292 struct buffer_head * old_bh, * new_bh, * dir_bh; 2294 struct buffer_head *old_bh, *new_bh, *dir_bh;
2293 struct ext4_dir_entry_2 * old_de, * new_de; 2295 struct ext4_dir_entry_2 *old_de, *new_de;
2294 int retval; 2296 int retval;
2295 2297
2296 old_bh = new_bh = dir_bh = NULL; 2298 old_bh = new_bh = dir_bh = NULL;
@@ -2308,7 +2310,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2308 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) 2310 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
2309 handle->h_sync = 1; 2311 handle->h_sync = 1;
2310 2312
2311 old_bh = ext4_find_entry (old_dentry, &old_de); 2313 old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
2312 /* 2314 /*
2313 * Check for inode number is _not_ due to possible IO errors. 2315 * Check for inode number is _not_ due to possible IO errors.
2314 * We might rmdir the source, keep it as pwd of some process 2316 * We might rmdir the source, keep it as pwd of some process
@@ -2321,32 +2323,32 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2321 goto end_rename; 2323 goto end_rename;
2322 2324
2323 new_inode = new_dentry->d_inode; 2325 new_inode = new_dentry->d_inode;
2324 new_bh = ext4_find_entry (new_dentry, &new_de); 2326 new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de);
2325 if (new_bh) { 2327 if (new_bh) {
2326 if (!new_inode) { 2328 if (!new_inode) {
2327 brelse (new_bh); 2329 brelse(new_bh);
2328 new_bh = NULL; 2330 new_bh = NULL;
2329 } 2331 }
2330 } 2332 }
2331 if (S_ISDIR(old_inode->i_mode)) { 2333 if (S_ISDIR(old_inode->i_mode)) {
2332 if (new_inode) { 2334 if (new_inode) {
2333 retval = -ENOTEMPTY; 2335 retval = -ENOTEMPTY;
2334 if (!empty_dir (new_inode)) 2336 if (!empty_dir(new_inode))
2335 goto end_rename; 2337 goto end_rename;
2336 } 2338 }
2337 retval = -EIO; 2339 retval = -EIO;
2338 dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval); 2340 dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
2339 if (!dir_bh) 2341 if (!dir_bh)
2340 goto end_rename; 2342 goto end_rename;
2341 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) 2343 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
2342 goto end_rename; 2344 goto end_rename;
2343 retval = -EMLINK; 2345 retval = -EMLINK;
2344 if (!new_inode && new_dir!=old_dir && 2346 if (!new_inode && new_dir != old_dir &&
2345 new_dir->i_nlink >= EXT4_LINK_MAX) 2347 new_dir->i_nlink >= EXT4_LINK_MAX)
2346 goto end_rename; 2348 goto end_rename;
2347 } 2349 }
2348 if (!new_bh) { 2350 if (!new_bh) {
2349 retval = ext4_add_entry (handle, new_dentry, old_inode); 2351 retval = ext4_add_entry(handle, new_dentry, old_inode);
2350 if (retval) 2352 if (retval)
2351 goto end_rename; 2353 goto end_rename;
2352 } else { 2354 } else {
@@ -2388,7 +2390,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2388 struct buffer_head *old_bh2; 2390 struct buffer_head *old_bh2;
2389 struct ext4_dir_entry_2 *old_de2; 2391 struct ext4_dir_entry_2 *old_de2;
2390 2392
2391 old_bh2 = ext4_find_entry(old_dentry, &old_de2); 2393 old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2);
2392 if (old_bh2) { 2394 if (old_bh2) {
2393 retval = ext4_delete_entry(handle, old_dir, 2395 retval = ext4_delete_entry(handle, old_dir,
2394 old_de2, old_bh2); 2396 old_de2, old_bh2);
@@ -2433,9 +2435,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2433 retval = 0; 2435 retval = 0;
2434 2436
2435end_rename: 2437end_rename:
2436 brelse (dir_bh); 2438 brelse(dir_bh);
2437 brelse (old_bh); 2439 brelse(old_bh);
2438 brelse (new_bh); 2440 brelse(new_bh);
2439 ext4_journal_stop(handle); 2441 ext4_journal_stop(handle);
2440 return retval; 2442 return retval;
2441} 2443}
@@ -2454,7 +2456,7 @@ const struct inode_operations ext4_dir_inode_operations = {
2454 .mknod = ext4_mknod, 2456 .mknod = ext4_mknod,
2455 .rename = ext4_rename, 2457 .rename = ext4_rename,
2456 .setattr = ext4_setattr, 2458 .setattr = ext4_setattr,
2457#ifdef CONFIG_EXT4DEV_FS_XATTR 2459#ifdef CONFIG_EXT4_FS_XATTR
2458 .setxattr = generic_setxattr, 2460 .setxattr = generic_setxattr,
2459 .getxattr = generic_getxattr, 2461 .getxattr = generic_getxattr,
2460 .listxattr = ext4_listxattr, 2462 .listxattr = ext4_listxattr,
@@ -2465,7 +2467,7 @@ const struct inode_operations ext4_dir_inode_operations = {
2465 2467
2466const struct inode_operations ext4_special_inode_operations = { 2468const struct inode_operations ext4_special_inode_operations = {
2467 .setattr = ext4_setattr, 2469 .setattr = ext4_setattr,
2468#ifdef CONFIG_EXT4DEV_FS_XATTR 2470#ifdef CONFIG_EXT4_FS_XATTR
2469 .setxattr = generic_setxattr, 2471 .setxattr = generic_setxattr,
2470 .getxattr = generic_getxattr, 2472 .getxattr = generic_getxattr,
2471 .listxattr = ext4_listxattr, 2473 .listxattr = ext4_listxattr,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b3d35604ea18..b6ec1843a015 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -416,8 +416,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
416 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", 416 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
417 gdb_num); 417 gdb_num);
418 418
419 /* 419 /*
420 * If we are not using the primary superblock/GDT copy don't resize, 420 * If we are not using the primary superblock/GDT copy don't resize,
421 * because the user tools have no way of handling this. Probably a 421 * because the user tools have no way of handling this. Probably a
422 * bad time to do it anyways. 422 * bad time to do it anyways.
423 */ 423 */
@@ -870,11 +870,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
870 * We can allocate memory for mb_alloc based on the new group 870 * We can allocate memory for mb_alloc based on the new group
871 * descriptor 871 * descriptor
872 */ 872 */
873 if (test_opt(sb, MBALLOC)) { 873 err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
874 err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); 874 if (err)
875 if (err) 875 goto exit_journal;
876 goto exit_journal; 876
877 }
878 /* 877 /*
879 * Make the new blocks and inodes valid next. We do this before 878 * Make the new blocks and inodes valid next. We do this before
880 * increasing the group count so that once the group is enabled, 879 * increasing the group count so that once the group is enabled,
@@ -929,6 +928,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
929 percpu_counter_add(&sbi->s_freeinodes_counter, 928 percpu_counter_add(&sbi->s_freeinodes_counter,
930 EXT4_INODES_PER_GROUP(sb)); 929 EXT4_INODES_PER_GROUP(sb));
931 930
931 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
932 ext4_group_t flex_group;
933 flex_group = ext4_flex_group(sbi, input->group);
934 sbi->s_flex_groups[flex_group].free_blocks +=
935 input->free_blocks_count;
936 sbi->s_flex_groups[flex_group].free_inodes +=
937 EXT4_INODES_PER_GROUP(sb);
938 }
939
932 ext4_journal_dirty_metadata(handle, sbi->s_sbh); 940 ext4_journal_dirty_metadata(handle, sbi->s_sbh);
933 sb->s_dirt = 1; 941 sb->s_dirt = 1;
934 942
@@ -964,7 +972,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
964 ext4_group_t o_groups_count; 972 ext4_group_t o_groups_count;
965 ext4_grpblk_t last; 973 ext4_grpblk_t last;
966 ext4_grpblk_t add; 974 ext4_grpblk_t add;
967 struct buffer_head * bh; 975 struct buffer_head *bh;
968 handle_t *handle; 976 handle_t *handle;
969 int err; 977 int err;
970 unsigned long freed_blocks; 978 unsigned long freed_blocks;
@@ -1077,8 +1085,15 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1077 /* 1085 /*
1078 * Mark mballoc pages as not up to date so that they will be updated 1086 * Mark mballoc pages as not up to date so that they will be updated
1079 * next time they are loaded by ext4_mb_load_buddy. 1087 * next time they are loaded by ext4_mb_load_buddy.
1088 *
1089 * XXX Bad, Bad, BAD!!! We should not be overloading the
1090 * Uptodate flag, particularly on thte bitmap bh, as way of
1091 * hinting to ext4_mb_load_buddy() that it needs to be
1092 * overloaded. A user could take a LVM snapshot, then do an
1093 * on-line fsck, and clear the uptodate flag, and this would
1094 * not be a bug in userspace, but a bug in the kernel. FIXME!!!
1080 */ 1095 */
1081 if (test_opt(sb, MBALLOC)) { 1096 {
1082 struct ext4_sb_info *sbi = EXT4_SB(sb); 1097 struct ext4_sb_info *sbi = EXT4_SB(sb);
1083 struct inode *inode = sbi->s_buddy_cache; 1098 struct inode *inode = sbi->s_buddy_cache;
1084 int blocks_per_page; 1099 int blocks_per_page;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 566344b926b7..fb940c22ab0d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -34,6 +34,8 @@
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/quotaops.h> 35#include <linux/quotaops.h>
36#include <linux/seq_file.h> 36#include <linux/seq_file.h>
37#include <linux/proc_fs.h>
38#include <linux/marker.h>
37#include <linux/log2.h> 39#include <linux/log2.h>
38#include <linux/crc16.h> 40#include <linux/crc16.h>
39#include <asm/uaccess.h> 41#include <asm/uaccess.h>
@@ -45,6 +47,8 @@
45#include "namei.h" 47#include "namei.h"
46#include "group.h" 48#include "group.h"
47 49
50struct proc_dir_entry *ext4_proc_root;
51
48static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 52static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
49 unsigned long journal_devnum); 53 unsigned long journal_devnum);
50static int ext4_create_journal(struct super_block *, struct ext4_super_block *, 54static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
@@ -503,15 +507,18 @@ static void ext4_put_super(struct super_block *sb)
503 ext4_mb_release(sb); 507 ext4_mb_release(sb);
504 ext4_ext_release(sb); 508 ext4_ext_release(sb);
505 ext4_xattr_put_super(sb); 509 ext4_xattr_put_super(sb);
506 jbd2_journal_destroy(sbi->s_journal); 510 if (jbd2_journal_destroy(sbi->s_journal) < 0)
511 ext4_abort(sb, __func__, "Couldn't clean up the journal");
507 sbi->s_journal = NULL; 512 sbi->s_journal = NULL;
508 if (!(sb->s_flags & MS_RDONLY)) { 513 if (!(sb->s_flags & MS_RDONLY)) {
509 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 514 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
510 es->s_state = cpu_to_le16(sbi->s_mount_state); 515 es->s_state = cpu_to_le16(sbi->s_mount_state);
511 BUFFER_TRACE(sbi->s_sbh, "marking dirty");
512 mark_buffer_dirty(sbi->s_sbh);
513 ext4_commit_super(sb, es, 1); 516 ext4_commit_super(sb, es, 1);
514 } 517 }
518 if (sbi->s_proc) {
519 remove_proc_entry("inode_readahead_blks", sbi->s_proc);
520 remove_proc_entry(sb->s_id, ext4_proc_root);
521 }
515 522
516 for (i = 0; i < sbi->s_gdb_count; i++) 523 for (i = 0; i < sbi->s_gdb_count; i++)
517 brelse(sbi->s_group_desc[i]); 524 brelse(sbi->s_group_desc[i]);
@@ -520,6 +527,7 @@ static void ext4_put_super(struct super_block *sb)
520 percpu_counter_destroy(&sbi->s_freeblocks_counter); 527 percpu_counter_destroy(&sbi->s_freeblocks_counter);
521 percpu_counter_destroy(&sbi->s_freeinodes_counter); 528 percpu_counter_destroy(&sbi->s_freeinodes_counter);
522 percpu_counter_destroy(&sbi->s_dirs_counter); 529 percpu_counter_destroy(&sbi->s_dirs_counter);
530 percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
523 brelse(sbi->s_sbh); 531 brelse(sbi->s_sbh);
524#ifdef CONFIG_QUOTA 532#ifdef CONFIG_QUOTA
525 for (i = 0; i < MAXQUOTAS; i++) 533 for (i = 0; i < MAXQUOTAS; i++)
@@ -562,11 +570,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
562 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 570 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
563 if (!ei) 571 if (!ei)
564 return NULL; 572 return NULL;
565#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 573#ifdef CONFIG_EXT4_FS_POSIX_ACL
566 ei->i_acl = EXT4_ACL_NOT_CACHED; 574 ei->i_acl = EXT4_ACL_NOT_CACHED;
567 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 575 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
568#endif 576#endif
569 ei->i_block_alloc_info = NULL;
570 ei->vfs_inode.i_version = 1; 577 ei->vfs_inode.i_version = 1;
571 ei->vfs_inode.i_data.writeback_index = 0; 578 ei->vfs_inode.i_data.writeback_index = 0;
572 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 579 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
@@ -599,7 +606,7 @@ static void init_once(void *foo)
599 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 606 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
600 607
601 INIT_LIST_HEAD(&ei->i_orphan); 608 INIT_LIST_HEAD(&ei->i_orphan);
602#ifdef CONFIG_EXT4DEV_FS_XATTR 609#ifdef CONFIG_EXT4_FS_XATTR
603 init_rwsem(&ei->xattr_sem); 610 init_rwsem(&ei->xattr_sem);
604#endif 611#endif
605 init_rwsem(&ei->i_data_sem); 612 init_rwsem(&ei->i_data_sem);
@@ -625,8 +632,7 @@ static void destroy_inodecache(void)
625 632
626static void ext4_clear_inode(struct inode *inode) 633static void ext4_clear_inode(struct inode *inode)
627{ 634{
628 struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; 635#ifdef CONFIG_EXT4_FS_POSIX_ACL
629#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
630 if (EXT4_I(inode)->i_acl && 636 if (EXT4_I(inode)->i_acl &&
631 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { 637 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
632 posix_acl_release(EXT4_I(inode)->i_acl); 638 posix_acl_release(EXT4_I(inode)->i_acl);
@@ -638,10 +644,7 @@ static void ext4_clear_inode(struct inode *inode)
638 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; 644 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
639 } 645 }
640#endif 646#endif
641 ext4_discard_reservation(inode); 647 ext4_discard_preallocations(inode);
642 EXT4_I(inode)->i_block_alloc_info = NULL;
643 if (unlikely(rsv))
644 kfree(rsv);
645 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 648 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
646 &EXT4_I(inode)->jinode); 649 &EXT4_I(inode)->jinode);
647} 650}
@@ -654,7 +657,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
654 657
655 if (sbi->s_jquota_fmt) 658 if (sbi->s_jquota_fmt)
656 seq_printf(seq, ",jqfmt=%s", 659 seq_printf(seq, ",jqfmt=%s",
657 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); 660 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
658 661
659 if (sbi->s_qf_names[USRQUOTA]) 662 if (sbi->s_qf_names[USRQUOTA])
660 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 663 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -718,7 +721,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
718 seq_puts(seq, ",debug"); 721 seq_puts(seq, ",debug");
719 if (test_opt(sb, OLDALLOC)) 722 if (test_opt(sb, OLDALLOC))
720 seq_puts(seq, ",oldalloc"); 723 seq_puts(seq, ",oldalloc");
721#ifdef CONFIG_EXT4DEV_FS_XATTR 724#ifdef CONFIG_EXT4_FS_XATTR
722 if (test_opt(sb, XATTR_USER) && 725 if (test_opt(sb, XATTR_USER) &&
723 !(def_mount_opts & EXT4_DEFM_XATTR_USER)) 726 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
724 seq_puts(seq, ",user_xattr"); 727 seq_puts(seq, ",user_xattr");
@@ -727,7 +730,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
727 seq_puts(seq, ",nouser_xattr"); 730 seq_puts(seq, ",nouser_xattr");
728 } 731 }
729#endif 732#endif
730#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 733#ifdef CONFIG_EXT4_FS_POSIX_ACL
731 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 734 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
732 seq_puts(seq, ",acl"); 735 seq_puts(seq, ",acl");
733 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 736 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
@@ -752,8 +755,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
752 seq_puts(seq, ",nobh"); 755 seq_puts(seq, ",nobh");
753 if (!test_opt(sb, EXTENTS)) 756 if (!test_opt(sb, EXTENTS))
754 seq_puts(seq, ",noextents"); 757 seq_puts(seq, ",noextents");
755 if (!test_opt(sb, MBALLOC))
756 seq_puts(seq, ",nomballoc");
757 if (test_opt(sb, I_VERSION)) 758 if (test_opt(sb, I_VERSION))
758 seq_puts(seq, ",i_version"); 759 seq_puts(seq, ",i_version");
759 if (!test_opt(sb, DELALLOC)) 760 if (!test_opt(sb, DELALLOC))
@@ -773,6 +774,13 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
773 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 774 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
774 seq_puts(seq, ",data=writeback"); 775 seq_puts(seq, ",data=writeback");
775 776
777 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
778 seq_printf(seq, ",inode_readahead_blks=%u",
779 sbi->s_inode_readahead_blks);
780
781 if (test_opt(sb, DATA_ERR_ABORT))
782 seq_puts(seq, ",data_err=abort");
783
776 ext4_show_quota_options(seq, sb); 784 ext4_show_quota_options(seq, sb);
777 return 0; 785 return 0;
778} 786}
@@ -822,7 +830,7 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
822} 830}
823 831
824#ifdef CONFIG_QUOTA 832#ifdef CONFIG_QUOTA
825#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group") 833#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
826#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 834#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
827 835
828static int ext4_dquot_initialize(struct inode *inode, int type); 836static int ext4_dquot_initialize(struct inode *inode, int type);
@@ -902,11 +910,13 @@ enum {
902 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 910 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
903 Opt_journal_checksum, Opt_journal_async_commit, 911 Opt_journal_checksum, Opt_journal_async_commit,
904 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 912 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
913 Opt_data_err_abort, Opt_data_err_ignore,
905 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 914 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
906 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 915 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
907 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 916 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
908 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, 917 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
909 Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, 918 Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
919 Opt_inode_readahead_blks
910}; 920};
911 921
912static match_table_t tokens = { 922static match_table_t tokens = {
@@ -947,6 +957,8 @@ static match_table_t tokens = {
947 {Opt_data_journal, "data=journal"}, 957 {Opt_data_journal, "data=journal"},
948 {Opt_data_ordered, "data=ordered"}, 958 {Opt_data_ordered, "data=ordered"},
949 {Opt_data_writeback, "data=writeback"}, 959 {Opt_data_writeback, "data=writeback"},
960 {Opt_data_err_abort, "data_err=abort"},
961 {Opt_data_err_ignore, "data_err=ignore"},
950 {Opt_offusrjquota, "usrjquota="}, 962 {Opt_offusrjquota, "usrjquota="},
951 {Opt_usrjquota, "usrjquota=%s"}, 963 {Opt_usrjquota, "usrjquota=%s"},
952 {Opt_offgrpjquota, "grpjquota="}, 964 {Opt_offgrpjquota, "grpjquota="},
@@ -967,6 +979,7 @@ static match_table_t tokens = {
967 {Opt_resize, "resize"}, 979 {Opt_resize, "resize"},
968 {Opt_delalloc, "delalloc"}, 980 {Opt_delalloc, "delalloc"},
969 {Opt_nodelalloc, "nodelalloc"}, 981 {Opt_nodelalloc, "nodelalloc"},
982 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
970 {Opt_err, NULL}, 983 {Opt_err, NULL},
971}; 984};
972 985
@@ -981,7 +994,7 @@ static ext4_fsblk_t get_sb_block(void **data)
981 /*todo: use simple_strtoll with >32bit ext4 */ 994 /*todo: use simple_strtoll with >32bit ext4 */
982 sb_block = simple_strtoul(options, &options, 0); 995 sb_block = simple_strtoul(options, &options, 0);
983 if (*options && *options != ',') { 996 if (*options && *options != ',') {
984 printk("EXT4-fs: Invalid sb specification: %s\n", 997 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
985 (char *) *data); 998 (char *) *data);
986 return 1; 999 return 1;
987 } 1000 }
@@ -1072,7 +1085,7 @@ static int parse_options(char *options, struct super_block *sb,
1072 case Opt_orlov: 1085 case Opt_orlov:
1073 clear_opt(sbi->s_mount_opt, OLDALLOC); 1086 clear_opt(sbi->s_mount_opt, OLDALLOC);
1074 break; 1087 break;
1075#ifdef CONFIG_EXT4DEV_FS_XATTR 1088#ifdef CONFIG_EXT4_FS_XATTR
1076 case Opt_user_xattr: 1089 case Opt_user_xattr:
1077 set_opt(sbi->s_mount_opt, XATTR_USER); 1090 set_opt(sbi->s_mount_opt, XATTR_USER);
1078 break; 1091 break;
@@ -1082,10 +1095,11 @@ static int parse_options(char *options, struct super_block *sb,
1082#else 1095#else
1083 case Opt_user_xattr: 1096 case Opt_user_xattr:
1084 case Opt_nouser_xattr: 1097 case Opt_nouser_xattr:
1085 printk("EXT4 (no)user_xattr options not supported\n"); 1098 printk(KERN_ERR "EXT4 (no)user_xattr options "
1099 "not supported\n");
1086 break; 1100 break;
1087#endif 1101#endif
1088#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 1102#ifdef CONFIG_EXT4_FS_POSIX_ACL
1089 case Opt_acl: 1103 case Opt_acl:
1090 set_opt(sbi->s_mount_opt, POSIX_ACL); 1104 set_opt(sbi->s_mount_opt, POSIX_ACL);
1091 break; 1105 break;
@@ -1095,7 +1109,8 @@ static int parse_options(char *options, struct super_block *sb,
1095#else 1109#else
1096 case Opt_acl: 1110 case Opt_acl:
1097 case Opt_noacl: 1111 case Opt_noacl:
1098 printk("EXT4 (no)acl options not supported\n"); 1112 printk(KERN_ERR "EXT4 (no)acl options "
1113 "not supported\n");
1099 break; 1114 break;
1100#endif 1115#endif
1101 case Opt_reservation: 1116 case Opt_reservation:
@@ -1178,6 +1193,12 @@ static int parse_options(char *options, struct super_block *sb,
1178 sbi->s_mount_opt |= data_opt; 1193 sbi->s_mount_opt |= data_opt;
1179 } 1194 }
1180 break; 1195 break;
1196 case Opt_data_err_abort:
1197 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1198 break;
1199 case Opt_data_err_ignore:
1200 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1201 break;
1181#ifdef CONFIG_QUOTA 1202#ifdef CONFIG_QUOTA
1182 case Opt_usrjquota: 1203 case Opt_usrjquota:
1183 qtype = USRQUOTA; 1204 qtype = USRQUOTA;
@@ -1189,8 +1210,8 @@ set_qf_name:
1189 sb_any_quota_suspended(sb)) && 1210 sb_any_quota_suspended(sb)) &&
1190 !sbi->s_qf_names[qtype]) { 1211 !sbi->s_qf_names[qtype]) {
1191 printk(KERN_ERR 1212 printk(KERN_ERR
1192 "EXT4-fs: Cannot change journaled " 1213 "EXT4-fs: Cannot change journaled "
1193 "quota options when quota turned on.\n"); 1214 "quota options when quota turned on.\n");
1194 return 0; 1215 return 0;
1195 } 1216 }
1196 qname = match_strdup(&args[0]); 1217 qname = match_strdup(&args[0]);
@@ -1357,12 +1378,6 @@ set_qf_format:
1357 case Opt_nodelalloc: 1378 case Opt_nodelalloc:
1358 clear_opt(sbi->s_mount_opt, DELALLOC); 1379 clear_opt(sbi->s_mount_opt, DELALLOC);
1359 break; 1380 break;
1360 case Opt_mballoc:
1361 set_opt(sbi->s_mount_opt, MBALLOC);
1362 break;
1363 case Opt_nomballoc:
1364 clear_opt(sbi->s_mount_opt, MBALLOC);
1365 break;
1366 case Opt_stripe: 1381 case Opt_stripe:
1367 if (match_int(&args[0], &option)) 1382 if (match_int(&args[0], &option))
1368 return 0; 1383 return 0;
@@ -1373,6 +1388,13 @@ set_qf_format:
1373 case Opt_delalloc: 1388 case Opt_delalloc:
1374 set_opt(sbi->s_mount_opt, DELALLOC); 1389 set_opt(sbi->s_mount_opt, DELALLOC);
1375 break; 1390 break;
1391 case Opt_inode_readahead_blks:
1392 if (match_int(&args[0], &option))
1393 return 0;
1394 if (option < 0 || option > (1 << 30))
1395 return 0;
1396 sbi->s_inode_readahead_blks = option;
1397 break;
1376 default: 1398 default:
1377 printk(KERN_ERR 1399 printk(KERN_ERR
1378 "EXT4-fs: Unrecognized mount option \"%s\" " 1400 "EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1473,15 +1495,9 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1473 EXT4_INODES_PER_GROUP(sb), 1495 EXT4_INODES_PER_GROUP(sb),
1474 sbi->s_mount_opt); 1496 sbi->s_mount_opt);
1475 1497
1476 printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); 1498 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
1477 if (EXT4_SB(sb)->s_journal->j_inode == NULL) { 1499 sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1478 char b[BDEVNAME_SIZE]; 1500 "external", EXT4_SB(sb)->s_journal->j_devname);
1479
1480 printk("external journal on %s\n",
1481 bdevname(EXT4_SB(sb)->s_journal->j_dev, b));
1482 } else {
1483 printk("internal journal\n");
1484 }
1485 return res; 1501 return res;
1486} 1502}
1487 1503
@@ -1504,8 +1520,11 @@ static int ext4_fill_flex_info(struct super_block *sb)
1504 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1520 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1505 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1521 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1506 1522
1507 flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / 1523 /* We allocate both existing and potentially added groups */
1508 groups_per_flex; 1524 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1525 ((sbi->s_es->s_reserved_gdt_blocks +1 ) <<
1526 EXT4_DESC_PER_BLOCK_BITS(sb))) /
1527 groups_per_flex;
1509 sbi->s_flex_groups = kzalloc(flex_group_count * 1528 sbi->s_flex_groups = kzalloc(flex_group_count *
1510 sizeof(struct flex_groups), GFP_KERNEL); 1529 sizeof(struct flex_groups), GFP_KERNEL);
1511 if (sbi->s_flex_groups == NULL) { 1530 if (sbi->s_flex_groups == NULL) {
@@ -1584,7 +1603,7 @@ static int ext4_check_descriptors(struct super_block *sb)
1584 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1603 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1585 flexbg_flag = 1; 1604 flexbg_flag = 1;
1586 1605
1587 ext4_debug ("Checking group descriptors"); 1606 ext4_debug("Checking group descriptors");
1588 1607
1589 for (i = 0; i < sbi->s_groups_count; i++) { 1608 for (i = 0; i < sbi->s_groups_count; i++) {
1590 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1609 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
@@ -1623,8 +1642,10 @@ static int ext4_check_descriptors(struct super_block *sb)
1623 "Checksum for group %lu failed (%u!=%u)\n", 1642 "Checksum for group %lu failed (%u!=%u)\n",
1624 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1643 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1625 gdp)), le16_to_cpu(gdp->bg_checksum)); 1644 gdp)), le16_to_cpu(gdp->bg_checksum));
1626 if (!(sb->s_flags & MS_RDONLY)) 1645 if (!(sb->s_flags & MS_RDONLY)) {
1646 spin_unlock(sb_bgl_lock(sbi, i));
1627 return 0; 1647 return 0;
1648 }
1628 } 1649 }
1629 spin_unlock(sb_bgl_lock(sbi, i)); 1650 spin_unlock(sb_bgl_lock(sbi, i));
1630 if (!flexbg_flag) 1651 if (!flexbg_flag)
@@ -1714,9 +1735,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1714 DQUOT_INIT(inode); 1735 DQUOT_INIT(inode);
1715 if (inode->i_nlink) { 1736 if (inode->i_nlink) {
1716 printk(KERN_DEBUG 1737 printk(KERN_DEBUG
1717 "%s: truncating inode %lu to %Ld bytes\n", 1738 "%s: truncating inode %lu to %lld bytes\n",
1718 __func__, inode->i_ino, inode->i_size); 1739 __func__, inode->i_ino, inode->i_size);
1719 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1740 jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1720 inode->i_ino, inode->i_size); 1741 inode->i_ino, inode->i_size);
1721 ext4_truncate(inode); 1742 ext4_truncate(inode);
1722 nr_truncates++; 1743 nr_truncates++;
@@ -1914,6 +1935,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1914 unsigned long journal_devnum = 0; 1935 unsigned long journal_devnum = 0;
1915 unsigned long def_mount_opts; 1936 unsigned long def_mount_opts;
1916 struct inode *root; 1937 struct inode *root;
1938 char *cp;
1917 int ret = -EINVAL; 1939 int ret = -EINVAL;
1918 int blocksize; 1940 int blocksize;
1919 int db_count; 1941 int db_count;
@@ -1930,10 +1952,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1930 sbi->s_mount_opt = 0; 1952 sbi->s_mount_opt = 0;
1931 sbi->s_resuid = EXT4_DEF_RESUID; 1953 sbi->s_resuid = EXT4_DEF_RESUID;
1932 sbi->s_resgid = EXT4_DEF_RESGID; 1954 sbi->s_resgid = EXT4_DEF_RESGID;
1955 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
1933 sbi->s_sb_block = sb_block; 1956 sbi->s_sb_block = sb_block;
1934 1957
1935 unlock_kernel(); 1958 unlock_kernel();
1936 1959
1960 /* Cleanup superblock name */
1961 for (cp = sb->s_id; (cp = strchr(cp, '/'));)
1962 *cp = '!';
1963
1937 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 1964 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
1938 if (!blocksize) { 1965 if (!blocksize) {
1939 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); 1966 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
@@ -1973,11 +2000,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1973 set_opt(sbi->s_mount_opt, GRPID); 2000 set_opt(sbi->s_mount_opt, GRPID);
1974 if (def_mount_opts & EXT4_DEFM_UID16) 2001 if (def_mount_opts & EXT4_DEFM_UID16)
1975 set_opt(sbi->s_mount_opt, NO_UID32); 2002 set_opt(sbi->s_mount_opt, NO_UID32);
1976#ifdef CONFIG_EXT4DEV_FS_XATTR 2003#ifdef CONFIG_EXT4_FS_XATTR
1977 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 2004 if (def_mount_opts & EXT4_DEFM_XATTR_USER)
1978 set_opt(sbi->s_mount_opt, XATTR_USER); 2005 set_opt(sbi->s_mount_opt, XATTR_USER);
1979#endif 2006#endif
1980#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 2007#ifdef CONFIG_EXT4_FS_POSIX_ACL
1981 if (def_mount_opts & EXT4_DEFM_ACL) 2008 if (def_mount_opts & EXT4_DEFM_ACL)
1982 set_opt(sbi->s_mount_opt, POSIX_ACL); 2009 set_opt(sbi->s_mount_opt, POSIX_ACL);
1983#endif 2010#endif
@@ -2012,11 +2039,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2012 ext4_warning(sb, __func__, 2039 ext4_warning(sb, __func__,
2013 "extents feature not enabled on this filesystem, " 2040 "extents feature not enabled on this filesystem, "
2014 "use tune2fs.\n"); 2041 "use tune2fs.\n");
2015 /*
2016 * turn on mballoc code by default in ext4 filesystem
2017 * Use -o nomballoc to turn it off
2018 */
2019 set_opt(sbi->s_mount_opt, MBALLOC);
2020 2042
2021 /* 2043 /*
2022 * enable delayed allocation by default 2044 * enable delayed allocation by default
@@ -2041,16 +2063,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2041 "running e2fsck is recommended\n"); 2063 "running e2fsck is recommended\n");
2042 2064
2043 /* 2065 /*
2044 * Since ext4 is still considered development code, we require
2045 * that the TEST_FILESYS flag in s->flags be set.
2046 */
2047 if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) {
2048 printk(KERN_WARNING "EXT4-fs: %s: not marked "
2049 "OK to use with test code.\n", sb->s_id);
2050 goto failed_mount;
2051 }
2052
2053 /*
2054 * Check feature flags regardless of the revision level, since we 2066 * Check feature flags regardless of the revision level, since we
2055 * previously didn't change the revision level when setting the flags, 2067 * previously didn't change the revision level when setting the flags,
2056 * so there is a chance incompat flags are set on a rev 0 filesystem. 2068 * so there is a chance incompat flags are set on a rev 0 filesystem.
@@ -2219,6 +2231,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2219 goto failed_mount; 2231 goto failed_mount;
2220 } 2232 }
2221 2233
2234#ifdef CONFIG_PROC_FS
2235 if (ext4_proc_root)
2236 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2237
2238 if (sbi->s_proc)
2239 proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
2240 &ext4_ui_proc_fops,
2241 &sbi->s_inode_readahead_blks);
2242#endif
2243
2222 bgl_lock_init(&sbi->s_blockgroup_lock); 2244 bgl_lock_init(&sbi->s_blockgroup_lock);
2223 2245
2224 for (i = 0; i < db_count; i++) { 2246 for (i = 0; i < db_count; i++) {
@@ -2257,24 +2279,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2257 err = percpu_counter_init(&sbi->s_dirs_counter, 2279 err = percpu_counter_init(&sbi->s_dirs_counter,
2258 ext4_count_dirs(sb)); 2280 ext4_count_dirs(sb));
2259 } 2281 }
2282 if (!err) {
2283 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2284 }
2260 if (err) { 2285 if (err) {
2261 printk(KERN_ERR "EXT4-fs: insufficient memory\n"); 2286 printk(KERN_ERR "EXT4-fs: insufficient memory\n");
2262 goto failed_mount3; 2287 goto failed_mount3;
2263 } 2288 }
2264 2289
2265 /* per fileystem reservation list head & lock */
2266 spin_lock_init(&sbi->s_rsv_window_lock);
2267 sbi->s_rsv_window_root = RB_ROOT;
2268 /* Add a single, static dummy reservation to the start of the
2269 * reservation window list --- it gives us a placeholder for
2270 * append-at-start-of-list which makes the allocation logic
2271 * _much_ simpler. */
2272 sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
2273 sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
2274 sbi->s_rsv_window_head.rsv_alloc_hit = 0;
2275 sbi->s_rsv_window_head.rsv_goal_size = 0;
2276 ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
2277
2278 sbi->s_stripe = ext4_get_stripe_size(sbi); 2290 sbi->s_stripe = ext4_get_stripe_size(sbi);
2279 2291
2280 /* 2292 /*
@@ -2471,7 +2483,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2471 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); 2483 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2472 2484
2473 ext4_ext_init(sb); 2485 ext4_ext_init(sb);
2474 ext4_mb_init(sb, needs_recovery); 2486 err = ext4_mb_init(sb, needs_recovery);
2487 if (err) {
2488 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
2489 err);
2490 goto failed_mount4;
2491 }
2475 2492
2476 lock_kernel(); 2493 lock_kernel();
2477 return 0; 2494 return 0;
@@ -2489,11 +2506,16 @@ failed_mount3:
2489 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2506 percpu_counter_destroy(&sbi->s_freeblocks_counter);
2490 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2507 percpu_counter_destroy(&sbi->s_freeinodes_counter);
2491 percpu_counter_destroy(&sbi->s_dirs_counter); 2508 percpu_counter_destroy(&sbi->s_dirs_counter);
2509 percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
2492failed_mount2: 2510failed_mount2:
2493 for (i = 0; i < db_count; i++) 2511 for (i = 0; i < db_count; i++)
2494 brelse(sbi->s_group_desc[i]); 2512 brelse(sbi->s_group_desc[i]);
2495 kfree(sbi->s_group_desc); 2513 kfree(sbi->s_group_desc);
2496failed_mount: 2514failed_mount:
2515 if (sbi->s_proc) {
2516 remove_proc_entry("inode_readahead_blks", sbi->s_proc);
2517 remove_proc_entry(sb->s_id, ext4_proc_root);
2518 }
2497#ifdef CONFIG_QUOTA 2519#ifdef CONFIG_QUOTA
2498 for (i = 0; i < MAXQUOTAS; i++) 2520 for (i = 0; i < MAXQUOTAS; i++)
2499 kfree(sbi->s_qf_names[i]); 2521 kfree(sbi->s_qf_names[i]);
@@ -2527,6 +2549,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
2527 journal->j_flags |= JBD2_BARRIER; 2549 journal->j_flags |= JBD2_BARRIER;
2528 else 2550 else
2529 journal->j_flags &= ~JBD2_BARRIER; 2551 journal->j_flags &= ~JBD2_BARRIER;
2552 if (test_opt(sb, DATA_ERR_ABORT))
2553 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
2554 else
2555 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
2530 spin_unlock(&journal->j_state_lock); 2556 spin_unlock(&journal->j_state_lock);
2531} 2557}
2532 2558
@@ -2552,7 +2578,7 @@ static journal_t *ext4_get_journal(struct super_block *sb,
2552 return NULL; 2578 return NULL;
2553 } 2579 }
2554 2580
2555 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", 2581 jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
2556 journal_inode, journal_inode->i_size); 2582 journal_inode, journal_inode->i_size);
2557 if (!S_ISREG(journal_inode->i_mode)) { 2583 if (!S_ISREG(journal_inode->i_mode)) {
2558 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); 2584 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
@@ -2715,6 +2741,11 @@ static int ext4_load_journal(struct super_block *sb,
2715 return -EINVAL; 2741 return -EINVAL;
2716 } 2742 }
2717 2743
2744 if (journal->j_flags & JBD2_BARRIER)
2745 printk(KERN_INFO "EXT4-fs: barriers enabled\n");
2746 else
2747 printk(KERN_INFO "EXT4-fs: barriers disabled\n");
2748
2718 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2749 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2719 err = jbd2_journal_update_format(journal); 2750 err = jbd2_journal_update_format(journal);
2720 if (err) { 2751 if (err) {
@@ -2799,13 +2830,34 @@ static void ext4_commit_super(struct super_block *sb,
2799 2830
2800 if (!sbh) 2831 if (!sbh)
2801 return; 2832 return;
2833 if (buffer_write_io_error(sbh)) {
2834 /*
2835 * Oh, dear. A previous attempt to write the
2836 * superblock failed. This could happen because the
2837 * USB device was yanked out. Or it could happen to
2838 * be a transient write error and maybe the block will
2839 * be remapped. Nothing we can do but to retry the
2840 * write and hope for the best.
2841 */
2842 printk(KERN_ERR "ext4: previous I/O error to "
2843 "superblock detected for %s.\n", sb->s_id);
2844 clear_buffer_write_io_error(sbh);
2845 set_buffer_uptodate(sbh);
2846 }
2802 es->s_wtime = cpu_to_le32(get_seconds()); 2847 es->s_wtime = cpu_to_le32(get_seconds());
2803 ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); 2848 ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb));
2804 es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 2849 es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
2805 BUFFER_TRACE(sbh, "marking dirty"); 2850 BUFFER_TRACE(sbh, "marking dirty");
2806 mark_buffer_dirty(sbh); 2851 mark_buffer_dirty(sbh);
2807 if (sync) 2852 if (sync) {
2808 sync_dirty_buffer(sbh); 2853 sync_dirty_buffer(sbh);
2854 if (buffer_write_io_error(sbh)) {
2855 printk(KERN_ERR "ext4: I/O error while writing "
2856 "superblock for %s.\n", sb->s_id);
2857 clear_buffer_write_io_error(sbh);
2858 set_buffer_uptodate(sbh);
2859 }
2860 }
2809} 2861}
2810 2862
2811 2863
@@ -2820,7 +2872,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
2820 journal_t *journal = EXT4_SB(sb)->s_journal; 2872 journal_t *journal = EXT4_SB(sb)->s_journal;
2821 2873
2822 jbd2_journal_lock_updates(journal); 2874 jbd2_journal_lock_updates(journal);
2823 jbd2_journal_flush(journal); 2875 if (jbd2_journal_flush(journal) < 0)
2876 goto out;
2877
2824 lock_super(sb); 2878 lock_super(sb);
2825 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 2879 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
2826 sb->s_flags & MS_RDONLY) { 2880 sb->s_flags & MS_RDONLY) {
@@ -2829,6 +2883,8 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
2829 ext4_commit_super(sb, es, 1); 2883 ext4_commit_super(sb, es, 1);
2830 } 2884 }
2831 unlock_super(sb); 2885 unlock_super(sb);
2886
2887out:
2832 jbd2_journal_unlock_updates(journal); 2888 jbd2_journal_unlock_updates(journal);
2833} 2889}
2834 2890
@@ -2907,6 +2963,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
2907{ 2963{
2908 tid_t target; 2964 tid_t target;
2909 2965
2966 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
2910 sb->s_dirt = 0; 2967 sb->s_dirt = 0;
2911 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { 2968 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
2912 if (wait) 2969 if (wait)
@@ -2928,7 +2985,13 @@ static void ext4_write_super_lockfs(struct super_block *sb)
2928 2985
2929 /* Now we set up the journal barrier. */ 2986 /* Now we set up the journal barrier. */
2930 jbd2_journal_lock_updates(journal); 2987 jbd2_journal_lock_updates(journal);
2931 jbd2_journal_flush(journal); 2988
2989 /*
2990 * We don't want to clear needs_recovery flag when we failed
2991 * to flush the journal.
2992 */
2993 if (jbd2_journal_flush(journal) < 0)
2994 return;
2932 2995
2933 /* Journal blocked and flushed, clear needs_recovery flag. */ 2996 /* Journal blocked and flushed, clear needs_recovery flag. */
2934 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2997 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -3162,7 +3225,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3162 buf->f_type = EXT4_SUPER_MAGIC; 3225 buf->f_type = EXT4_SUPER_MAGIC;
3163 buf->f_bsize = sb->s_blocksize; 3226 buf->f_bsize = sb->s_blocksize;
3164 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3227 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3165 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); 3228 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3229 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3166 ext4_free_blocks_count_set(es, buf->f_bfree); 3230 ext4_free_blocks_count_set(es, buf->f_bfree);
3167 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3231 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3168 if (buf->f_bfree < ext4_r_blocks_count(es)) 3232 if (buf->f_bfree < ext4_r_blocks_count(es))
@@ -3367,8 +3431,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3367 * otherwise be livelocked... 3431 * otherwise be livelocked...
3368 */ 3432 */
3369 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 3433 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
3370 jbd2_journal_flush(EXT4_SB(sb)->s_journal); 3434 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3371 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3435 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3436 if (err) {
3437 path_put(&nd.path);
3438 return err;
3439 }
3372 } 3440 }
3373 3441
3374 err = vfs_quota_on_path(sb, type, format_id, &nd.path); 3442 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
@@ -3432,7 +3500,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3432 handle_t *handle = journal_current_handle(); 3500 handle_t *handle = journal_current_handle();
3433 3501
3434 if (!handle) { 3502 if (!handle) {
3435 printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)" 3503 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
3436 " cancelled because transaction is not started.\n", 3504 " cancelled because transaction is not started.\n",
3437 (unsigned long long)off, (unsigned long long)len); 3505 (unsigned long long)off, (unsigned long long)len);
3438 return -EIO; 3506 return -EIO;
@@ -3493,18 +3561,82 @@ static int ext4_get_sb(struct file_system_type *fs_type,
3493 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3561 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3494} 3562}
3495 3563
3564#ifdef CONFIG_PROC_FS
3565static int ext4_ui_proc_show(struct seq_file *m, void *v)
3566{
3567 unsigned int *p = m->private;
3568
3569 seq_printf(m, "%u\n", *p);
3570 return 0;
3571}
3572
3573static int ext4_ui_proc_open(struct inode *inode, struct file *file)
3574{
3575 return single_open(file, ext4_ui_proc_show, PDE(inode)->data);
3576}
3577
3578static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
3579 size_t cnt, loff_t *ppos)
3580{
3581 unsigned int *p = PDE(file->f_path.dentry->d_inode)->data;
3582 char str[32];
3583 unsigned long value;
3584
3585 if (cnt >= sizeof(str))
3586 return -EINVAL;
3587 if (copy_from_user(str, buf, cnt))
3588 return -EFAULT;
3589 value = simple_strtol(str, NULL, 0);
3590 if (value < 0)
3591 return -ERANGE;
3592 *p = value;
3593 return cnt;
3594}
3595
3596const struct file_operations ext4_ui_proc_fops = {
3597 .owner = THIS_MODULE,
3598 .open = ext4_ui_proc_open,
3599 .read = seq_read,
3600 .llseek = seq_lseek,
3601 .release = single_release,
3602 .write = ext4_ui_proc_write,
3603};
3604#endif
3605
3606static struct file_system_type ext4_fs_type = {
3607 .owner = THIS_MODULE,
3608 .name = "ext4",
3609 .get_sb = ext4_get_sb,
3610 .kill_sb = kill_block_super,
3611 .fs_flags = FS_REQUIRES_DEV,
3612};
3613
3614#ifdef CONFIG_EXT4DEV_COMPAT
3615static int ext4dev_get_sb(struct file_system_type *fs_type,
3616 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3617{
3618 printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
3619 "to mount using ext4\n");
3620 printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
3621 "will go away by 2.6.31\n");
3622 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3623}
3624
3496static struct file_system_type ext4dev_fs_type = { 3625static struct file_system_type ext4dev_fs_type = {
3497 .owner = THIS_MODULE, 3626 .owner = THIS_MODULE,
3498 .name = "ext4dev", 3627 .name = "ext4dev",
3499 .get_sb = ext4_get_sb, 3628 .get_sb = ext4dev_get_sb,
3500 .kill_sb = kill_block_super, 3629 .kill_sb = kill_block_super,
3501 .fs_flags = FS_REQUIRES_DEV, 3630 .fs_flags = FS_REQUIRES_DEV,
3502}; 3631};
3632MODULE_ALIAS("ext4dev");
3633#endif
3503 3634
3504static int __init init_ext4_fs(void) 3635static int __init init_ext4_fs(void)
3505{ 3636{
3506 int err; 3637 int err;
3507 3638
3639 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3508 err = init_ext4_mballoc(); 3640 err = init_ext4_mballoc();
3509 if (err) 3641 if (err)
3510 return err; 3642 return err;
@@ -3515,9 +3647,16 @@ static int __init init_ext4_fs(void)
3515 err = init_inodecache(); 3647 err = init_inodecache();
3516 if (err) 3648 if (err)
3517 goto out1; 3649 goto out1;
3518 err = register_filesystem(&ext4dev_fs_type); 3650 err = register_filesystem(&ext4_fs_type);
3519 if (err) 3651 if (err)
3520 goto out; 3652 goto out;
3653#ifdef CONFIG_EXT4DEV_COMPAT
3654 err = register_filesystem(&ext4dev_fs_type);
3655 if (err) {
3656 unregister_filesystem(&ext4_fs_type);
3657 goto out;
3658 }
3659#endif
3521 return 0; 3660 return 0;
3522out: 3661out:
3523 destroy_inodecache(); 3662 destroy_inodecache();
@@ -3530,10 +3669,14 @@ out2:
3530 3669
3531static void __exit exit_ext4_fs(void) 3670static void __exit exit_ext4_fs(void)
3532{ 3671{
3672 unregister_filesystem(&ext4_fs_type);
3673#ifdef CONFIG_EXT4DEV_COMPAT
3533 unregister_filesystem(&ext4dev_fs_type); 3674 unregister_filesystem(&ext4dev_fs_type);
3675#endif
3534 destroy_inodecache(); 3676 destroy_inodecache();
3535 exit_ext4_xattr(); 3677 exit_ext4_xattr();
3536 exit_ext4_mballoc(); 3678 exit_ext4_mballoc();
3679 remove_proc_entry("fs/ext4", NULL);
3537} 3680}
3538 3681
3539MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 3682MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e9178643dc01..00740cb32be3 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -23,10 +23,10 @@
23#include "ext4.h" 23#include "ext4.h"
24#include "xattr.h" 24#include "xattr.h"
25 25
26static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) 26static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
27{ 27{
28 struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); 28 struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
29 nd_set_link(nd, (char*)ei->i_data); 29 nd_set_link(nd, (char *) ei->i_data);
30 return NULL; 30 return NULL;
31} 31}
32 32
@@ -34,7 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
34 .readlink = generic_readlink, 34 .readlink = generic_readlink,
35 .follow_link = page_follow_link_light, 35 .follow_link = page_follow_link_light,
36 .put_link = page_put_link, 36 .put_link = page_put_link,
37#ifdef CONFIG_EXT4DEV_FS_XATTR 37#ifdef CONFIG_EXT4_FS_XATTR
38 .setxattr = generic_setxattr, 38 .setxattr = generic_setxattr,
39 .getxattr = generic_getxattr, 39 .getxattr = generic_getxattr,
40 .listxattr = ext4_listxattr, 40 .listxattr = ext4_listxattr,
@@ -45,7 +45,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
45const struct inode_operations ext4_fast_symlink_inode_operations = { 45const struct inode_operations ext4_fast_symlink_inode_operations = {
46 .readlink = generic_readlink, 46 .readlink = generic_readlink,
47 .follow_link = ext4_follow_link, 47 .follow_link = ext4_follow_link,
48#ifdef CONFIG_EXT4DEV_FS_XATTR 48#ifdef CONFIG_EXT4_FS_XATTR
49 .setxattr = generic_setxattr, 49 .setxattr = generic_setxattr,
50 .getxattr = generic_getxattr, 50 .getxattr = generic_getxattr,
51 .listxattr = ext4_listxattr, 51 .listxattr = ext4_listxattr,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 8954208b4893..80626d516fee 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache;
99 99
100static struct xattr_handler *ext4_xattr_handler_map[] = { 100static struct xattr_handler *ext4_xattr_handler_map[] = {
101 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, 101 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
102#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 102#ifdef CONFIG_EXT4_FS_POSIX_ACL
103 [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, 103 [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler,
104 [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, 104 [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler,
105#endif 105#endif
106 [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, 106 [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler,
107#ifdef CONFIG_EXT4DEV_FS_SECURITY 107#ifdef CONFIG_EXT4_FS_SECURITY
108 [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, 108 [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler,
109#endif 109#endif
110}; 110};
@@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_handler_map[] = {
112struct xattr_handler *ext4_xattr_handlers[] = { 112struct xattr_handler *ext4_xattr_handlers[] = {
113 &ext4_xattr_user_handler, 113 &ext4_xattr_user_handler,
114 &ext4_xattr_trusted_handler, 114 &ext4_xattr_trusted_handler,
115#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 115#ifdef CONFIG_EXT4_FS_POSIX_ACL
116 &ext4_xattr_acl_access_handler, 116 &ext4_xattr_acl_access_handler,
117 &ext4_xattr_acl_default_handler, 117 &ext4_xattr_acl_default_handler,
118#endif 118#endif
119#ifdef CONFIG_EXT4DEV_FS_SECURITY 119#ifdef CONFIG_EXT4_FS_SECURITY
120 &ext4_xattr_security_handler, 120 &ext4_xattr_security_handler,
121#endif 121#endif
122 NULL 122 NULL
@@ -959,6 +959,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
959 struct ext4_xattr_block_find bs = { 959 struct ext4_xattr_block_find bs = {
960 .s = { .not_found = -ENODATA, }, 960 .s = { .not_found = -ENODATA, },
961 }; 961 };
962 unsigned long no_expand;
962 int error; 963 int error;
963 964
964 if (!name) 965 if (!name)
@@ -966,6 +967,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
966 if (strlen(name) > 255) 967 if (strlen(name) > 255)
967 return -ERANGE; 968 return -ERANGE;
968 down_write(&EXT4_I(inode)->xattr_sem); 969 down_write(&EXT4_I(inode)->xattr_sem);
970 no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND;
971 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
972
969 error = ext4_get_inode_loc(inode, &is.iloc); 973 error = ext4_get_inode_loc(inode, &is.iloc);
970 if (error) 974 if (error)
971 goto cleanup; 975 goto cleanup;
@@ -1042,6 +1046,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1042cleanup: 1046cleanup:
1043 brelse(is.iloc.bh); 1047 brelse(is.iloc.bh);
1044 brelse(bs.bh); 1048 brelse(bs.bh);
1049 if (no_expand == 0)
1050 EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND;
1045 up_write(&EXT4_I(inode)->xattr_sem); 1051 up_write(&EXT4_I(inode)->xattr_sem);
1046 return error; 1052 return error;
1047} 1053}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 5992fe979bb9..8ede88b18c29 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -51,8 +51,8 @@ struct ext4_xattr_entry {
51 (((name_len) + EXT4_XATTR_ROUND + \ 51 (((name_len) + EXT4_XATTR_ROUND + \
52 sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) 52 sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
53#define EXT4_XATTR_NEXT(entry) \ 53#define EXT4_XATTR_NEXT(entry) \
54 ( (struct ext4_xattr_entry *)( \ 54 ((struct ext4_xattr_entry *)( \
55 (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) ) 55 (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)))
56#define EXT4_XATTR_SIZE(size) \ 56#define EXT4_XATTR_SIZE(size) \
57 (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) 57 (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
58 58
@@ -63,7 +63,7 @@ struct ext4_xattr_entry {
63 EXT4_I(inode)->i_extra_isize)) 63 EXT4_I(inode)->i_extra_isize))
64#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) 64#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
65 65
66# ifdef CONFIG_EXT4DEV_FS_XATTR 66# ifdef CONFIG_EXT4_FS_XATTR
67 67
68extern struct xattr_handler ext4_xattr_user_handler; 68extern struct xattr_handler ext4_xattr_user_handler;
69extern struct xattr_handler ext4_xattr_trusted_handler; 69extern struct xattr_handler ext4_xattr_trusted_handler;
@@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void);
88 88
89extern struct xattr_handler *ext4_xattr_handlers[]; 89extern struct xattr_handler *ext4_xattr_handlers[];
90 90
91# else /* CONFIG_EXT4DEV_FS_XATTR */ 91# else /* CONFIG_EXT4_FS_XATTR */
92 92
93static inline int 93static inline int
94ext4_xattr_get(struct inode *inode, int name_index, const char *name, 94ext4_xattr_get(struct inode *inode, int name_index, const char *name,
@@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
141 141
142#define ext4_xattr_handlers NULL 142#define ext4_xattr_handlers NULL
143 143
144# endif /* CONFIG_EXT4DEV_FS_XATTR */ 144# endif /* CONFIG_EXT4_FS_XATTR */
145 145
146#ifdef CONFIG_EXT4DEV_FS_SECURITY 146#ifdef CONFIG_EXT4_FS_SECURITY
147extern int ext4_init_security(handle_t *handle, struct inode *inode, 147extern int ext4_init_security(handle_t *handle, struct inode *inode,
148 struct inode *dir); 148 struct inode *dir);
149#else 149#else