aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2009-03-12 12:18:34 -0400
committerTheodore Ts'o <tytso@mit.edu>2009-03-12 12:18:34 -0400
commita4912123b688e057084e6557cef8924f7ae5bbde (patch)
tree34e88705d6617b52caa0f87692b480119a9c9e2e
parent2dc6b0d48ca0599837df21b14bb8393d0804af57 (diff)
ext4: New inode/block allocation algorithms for flex_bg filesystems
The find_group_flex() inode allocator is now only used if the filesystem is mounted using the "oldalloc" mount option. It is replaced with the original Orlov allocator that has been updated for flex_bg filesystems (it should behave the same way if flex_bg is disabled). The inode allocator now functions by taking into account each flex_bg group, instead of each block group, when deciding whether or not it's time to allocate a new directory into a fresh flex_bg. The block allocator has also been changed so that the first block group in each flex_bg is preferred for use for storing directory blocks. This keeps directory blocks close together, which is good for speeding up e2fsck since large directories are more likely to look like this: debugfs: stat /home/tytso/Maildir/cur Inode: 1844562 Type: directory Mode: 0700 Flags: 0x81000 Generation: 1132745781 Version: 0x00000000:0000ad71 User: 15806 Group: 15806 Size: 1060864 File ACL: 0 Directory ACL: 0 Links: 2 Blockcount: 2072 Fragment: Address: 0 Number: 0 Size: 0 ctime: 0x499c0ff4:164961f4 -- Wed Feb 18 08:41:08 2009 atime: 0x499c0ff4:00000000 -- Wed Feb 18 08:41:08 2009 mtime: 0x49957f51:00000000 -- Fri Feb 13 09:10:25 2009 crtime: 0x499c0f57:00d51440 -- Wed Feb 18 08:38:31 2009 Size of extra inode fields: 28 BLOCKS: (0):7348651, (1-258):7348654-7348911 TOTAL: 259 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/ext4_i.h3
-rw-r--r--fs/ext4/extents.c25
-rw-r--r--fs/ext4/ialloc.c215
-rw-r--r--fs/ext4/inode.c18
-rw-r--r--fs/ext4/mballoc.c7
6 files changed, 216 insertions, 58 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2cbfc0b04d37..096456c8559b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -828,6 +828,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
828#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */ 828#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */
829 829
830/* 830/*
831 * Minimum number of groups in a flexgroup before we separate out
832 * directories into the first block group of a flexgroup
833 */
834#define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4
835
836/*
831 * Structure of a directory entry 837 * Structure of a directory entry
832 */ 838 */
833#define EXT4_NAME_LEN 255 839#define EXT4_NAME_LEN 255
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
index 2d516c0a22af..4ce2187123aa 100644
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -122,6 +122,9 @@ struct ext4_inode_info {
122 struct list_head i_prealloc_list; 122 struct list_head i_prealloc_list;
123 spinlock_t i_prealloc_lock; 123 spinlock_t i_prealloc_lock;
124 124
125 /* ialloc */
126 ext4_group_t i_last_alloc_group;
127
125 /* allocation reservation info for delalloc */ 128 /* allocation reservation info for delalloc */
126 unsigned int i_reserved_data_blocks; 129 unsigned int i_reserved_data_blocks;
127 unsigned int i_reserved_meta_blocks; 130 unsigned int i_reserved_meta_blocks;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e0aa4fe4f596..aa3431856c9a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -152,6 +152,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
152 ext4_fsblk_t bg_start; 152 ext4_fsblk_t bg_start;
153 ext4_fsblk_t last_block; 153 ext4_fsblk_t last_block;
154 ext4_grpblk_t colour; 154 ext4_grpblk_t colour;
155 ext4_group_t block_group;
156 int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
155 int depth; 157 int depth;
156 158
157 if (path) { 159 if (path) {
@@ -170,10 +172,31 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
170 } 172 }
171 173
172 /* OK. use inode's group */ 174 /* OK. use inode's group */
173 bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + 175 block_group = ei->i_block_group;
176 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
177 /*
178 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
179 * block groups per flexgroup, reserve the first block
180 * group for directories and special files. Regular
181 * files will start at the second block group. This
182 * tends to speed up directory access and improves
183 * fsck times.
184 */
185 block_group &= ~(flex_size-1);
186 if (S_ISREG(inode->i_mode))
187 block_group++;
188 }
189 bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
174 le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); 190 le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
175 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; 191 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
176 192
193 /*
194 * If we are doing delayed allocation, we don't need take
195 * colour into account.
196 */
197 if (test_opt(inode->i_sb, DELALLOC))
198 return bg_start;
199
177 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) 200 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
178 colour = (current->pid % 16) * 201 colour = (current->pid % 16) *
179 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); 202 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ae3eb57dccdd..617f5a2d800a 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -410,6 +410,43 @@ out:
410 return 0; 410 return 0;
411} 411}
412 412
413struct orlov_stats {
414 __u32 free_inodes;
415 __u32 free_blocks;
416 __u32 used_dirs;
417};
418
419/*
420 * Helper function for Orlov's allocator; returns critical information
421 * for a particular block group or flex_bg. If flex_size is 1, then g
422 * is a block group number; otherwise it is flex_bg number.
423 */
424void get_orlov_stats(struct super_block *sb, ext4_group_t g,
425 int flex_size, struct orlov_stats *stats)
426{
427 struct ext4_group_desc *desc;
428 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
429 int i;
430
431 stats->free_inodes = 0;
432 stats->free_blocks = 0;
433 stats->used_dirs = 0;
434
435 g *= flex_size;
436
437 for (i = 0; i < flex_size; i++) {
438 if (g >= ngroups)
439 break;
440 desc = ext4_get_group_desc(sb, g++, NULL);
441 if (!desc)
442 continue;
443
444 stats->free_inodes += ext4_free_inodes_count(sb, desc);
445 stats->free_blocks += ext4_free_blks_count(sb, desc);
446 stats->used_dirs += ext4_used_dirs_count(sb, desc);
447 }
448}
449
413/* 450/*
414 * Orlov's allocator for directories. 451 * Orlov's allocator for directories.
415 * 452 *
@@ -425,35 +462,34 @@ out:
425 * it has too many directories already (max_dirs) or 462 * it has too many directories already (max_dirs) or
426 * it has too few free inodes left (min_inodes) or 463 * it has too few free inodes left (min_inodes) or
427 * it has too few free blocks left (min_blocks) or 464 * it has too few free blocks left (min_blocks) or
428 * it's already running too large debt (max_debt).
429 * Parent's group is preferred, if it doesn't satisfy these 465 * Parent's group is preferred, if it doesn't satisfy these
430 * conditions we search cyclically through the rest. If none 466 * conditions we search cyclically through the rest. If none
431 * of the groups look good we just look for a group with more 467 * of the groups look good we just look for a group with more
432 * free inodes than average (starting at parent's group). 468 * free inodes than average (starting at parent's group).
433 *
434 * Debt is incremented each time we allocate a directory and decremented
435 * when we allocate an inode, within 0--255.
436 */ 469 */
437 470
438#define INODE_COST 64
439#define BLOCK_COST 256
440
441static int find_group_orlov(struct super_block *sb, struct inode *parent, 471static int find_group_orlov(struct super_block *sb, struct inode *parent,
442 ext4_group_t *group) 472 ext4_group_t *group, int mode)
443{ 473{
444 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 474 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
445 struct ext4_sb_info *sbi = EXT4_SB(sb); 475 struct ext4_sb_info *sbi = EXT4_SB(sb);
446 struct ext4_super_block *es = sbi->s_es;
447 ext4_group_t ngroups = sbi->s_groups_count; 476 ext4_group_t ngroups = sbi->s_groups_count;
448 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 477 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
449 unsigned int freei, avefreei; 478 unsigned int freei, avefreei;
450 ext4_fsblk_t freeb, avefreeb; 479 ext4_fsblk_t freeb, avefreeb;
451 ext4_fsblk_t blocks_per_dir;
452 unsigned int ndirs; 480 unsigned int ndirs;
453 int max_debt, max_dirs, min_inodes; 481 int max_dirs, min_inodes;
454 ext4_grpblk_t min_blocks; 482 ext4_grpblk_t min_blocks;
455 ext4_group_t i; 483 ext4_group_t i, grp, g;
456 struct ext4_group_desc *desc; 484 struct ext4_group_desc *desc;
485 struct orlov_stats stats;
486 int flex_size = ext4_flex_bg_size(sbi);
487
488 if (flex_size > 1) {
489 ngroups = (ngroups + flex_size - 1) >>
490 sbi->s_log_groups_per_flex;
491 parent_group >>= sbi->s_log_groups_per_flex;
492 }
457 493
458 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); 494 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
459 avefreei = freei / ngroups; 495 avefreei = freei / ngroups;
@@ -462,71 +498,97 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
462 do_div(avefreeb, ngroups); 498 do_div(avefreeb, ngroups);
463 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); 499 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
464 500
465 if ((parent == sb->s_root->d_inode) || 501 if (S_ISDIR(mode) &&
466 (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { 502 ((parent == sb->s_root->d_inode) ||
503 (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) {
467 int best_ndir = inodes_per_group; 504 int best_ndir = inodes_per_group;
468 ext4_group_t grp;
469 int ret = -1; 505 int ret = -1;
470 506
471 get_random_bytes(&grp, sizeof(grp)); 507 get_random_bytes(&grp, sizeof(grp));
472 parent_group = (unsigned)grp % ngroups; 508 parent_group = (unsigned)grp % ngroups;
473 for (i = 0; i < ngroups; i++) { 509 for (i = 0; i < ngroups; i++) {
474 grp = (parent_group + i) % ngroups; 510 g = (parent_group + i) % ngroups;
475 desc = ext4_get_group_desc(sb, grp, NULL); 511 get_orlov_stats(sb, g, flex_size, &stats);
476 if (!desc || !ext4_free_inodes_count(sb, desc)) 512 if (!stats.free_inodes)
477 continue; 513 continue;
478 if (ext4_used_dirs_count(sb, desc) >= best_ndir) 514 if (stats.used_dirs >= best_ndir)
479 continue; 515 continue;
480 if (ext4_free_inodes_count(sb, desc) < avefreei) 516 if (stats.free_inodes < avefreei)
481 continue; 517 continue;
482 if (ext4_free_blks_count(sb, desc) < avefreeb) 518 if (stats.free_blocks < avefreeb)
483 continue; 519 continue;
484 *group = grp; 520 grp = g;
485 ret = 0; 521 ret = 0;
486 best_ndir = ext4_used_dirs_count(sb, desc); 522 best_ndir = stats.used_dirs;
523 }
524 if (ret)
525 goto fallback;
526 found_flex_bg:
527 if (flex_size == 1) {
528 *group = grp;
529 return 0;
530 }
531
532 /*
533 * We pack inodes at the beginning of the flexgroup's
534 * inode tables. Block allocation decisions will do
535 * something similar, although regular files will
536 * start at 2nd block group of the flexgroup. See
537 * ext4_ext_find_goal() and ext4_find_near().
538 */
539 grp *= flex_size;
540 for (i = 0; i < flex_size; i++) {
541 if (grp+i >= sbi->s_groups_count)
542 break;
543 desc = ext4_get_group_desc(sb, grp+i, NULL);
544 if (desc && ext4_free_inodes_count(sb, desc)) {
545 *group = grp+i;
546 return 0;
547 }
487 } 548 }
488 if (ret == 0)
489 return ret;
490 goto fallback; 549 goto fallback;
491 } 550 }
492 551
493 blocks_per_dir = ext4_blocks_count(es) - freeb;
494 do_div(blocks_per_dir, ndirs);
495
496 max_dirs = ndirs / ngroups + inodes_per_group / 16; 552 max_dirs = ndirs / ngroups + inodes_per_group / 16;
497 min_inodes = avefreei - inodes_per_group / 4; 553 min_inodes = avefreei - inodes_per_group*flex_size / 4;
498 min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4; 554 if (min_inodes < 1)
499 555 min_inodes = 1;
500 max_debt = EXT4_BLOCKS_PER_GROUP(sb); 556 min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4;
501 max_debt /= max_t(int, blocks_per_dir, BLOCK_COST); 557
502 if (max_debt * INODE_COST > inodes_per_group) 558 /*
503 max_debt = inodes_per_group / INODE_COST; 559 * Start looking in the flex group where we last allocated an
504 if (max_debt > 255) 560 * inode for this parent directory
505 max_debt = 255; 561 */
506 if (max_debt == 0) 562 if (EXT4_I(parent)->i_last_alloc_group != ~0) {
507 max_debt = 1; 563 parent_group = EXT4_I(parent)->i_last_alloc_group;
564 if (flex_size > 1)
565 parent_group >>= sbi->s_log_groups_per_flex;
566 }
508 567
509 for (i = 0; i < ngroups; i++) { 568 for (i = 0; i < ngroups; i++) {
510 *group = (parent_group + i) % ngroups; 569 grp = (parent_group + i) % ngroups;
511 desc = ext4_get_group_desc(sb, *group, NULL); 570 get_orlov_stats(sb, grp, flex_size, &stats);
512 if (!desc || !ext4_free_inodes_count(sb, desc)) 571 if (stats.used_dirs >= max_dirs)
513 continue;
514 if (ext4_used_dirs_count(sb, desc) >= max_dirs)
515 continue; 572 continue;
516 if (ext4_free_inodes_count(sb, desc) < min_inodes) 573 if (stats.free_inodes < min_inodes)
517 continue; 574 continue;
518 if (ext4_free_blks_count(sb, desc) < min_blocks) 575 if (stats.free_blocks < min_blocks)
519 continue; 576 continue;
520 return 0; 577 goto found_flex_bg;
521 } 578 }
522 579
523fallback: 580fallback:
581 ngroups = sbi->s_groups_count;
582 avefreei = freei / ngroups;
583 parent_group = EXT4_I(parent)->i_block_group;
524 for (i = 0; i < ngroups; i++) { 584 for (i = 0; i < ngroups; i++) {
525 *group = (parent_group + i) % ngroups; 585 grp = (parent_group + i) % ngroups;
526 desc = ext4_get_group_desc(sb, *group, NULL); 586 desc = ext4_get_group_desc(sb, grp, NULL);
527 if (desc && ext4_free_inodes_count(sb, desc) && 587 if (desc && ext4_free_inodes_count(sb, desc) &&
528 ext4_free_inodes_count(sb, desc) >= avefreei) 588 ext4_free_inodes_count(sb, desc) >= avefreei) {
589 *group = grp;
529 return 0; 590 return 0;
591 }
530 } 592 }
531 593
532 if (avefreei) { 594 if (avefreei) {
@@ -542,12 +604,51 @@ fallback:
542} 604}
543 605
544static int find_group_other(struct super_block *sb, struct inode *parent, 606static int find_group_other(struct super_block *sb, struct inode *parent,
545 ext4_group_t *group) 607 ext4_group_t *group, int mode)
546{ 608{
547 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 609 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
548 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 610 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
549 struct ext4_group_desc *desc; 611 struct ext4_group_desc *desc;
550 ext4_group_t i; 612 ext4_group_t i, last;
613 int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
614
615 /*
616 * Try to place the inode is the same flex group as its
617 * parent. If we can't find space, use the Orlov algorithm to
618 * find another flex group, and store that information in the
619 * parent directory's inode information so that use that flex
620 * group for future allocations.
621 */
622 if (flex_size > 1) {
623 int retry = 0;
624
625 try_again:
626 parent_group &= ~(flex_size-1);
627 last = parent_group + flex_size;
628 if (last > ngroups)
629 last = ngroups;
630 for (i = parent_group; i < last; i++) {
631 desc = ext4_get_group_desc(sb, i, NULL);
632 if (desc && ext4_free_inodes_count(sb, desc)) {
633 *group = i;
634 return 0;
635 }
636 }
637 if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) {
638 retry = 1;
639 parent_group = EXT4_I(parent)->i_last_alloc_group;
640 goto try_again;
641 }
642 /*
643 * If this didn't work, use the Orlov search algorithm
644 * to find a new flex group; we pass in the mode to
645 * avoid the topdir algorithms.
646 */
647 *group = parent_group + flex_size;
648 if (*group > ngroups)
649 *group = 0;
650 return find_group_orlov(sb, parent, group, mode);
651 }
551 652
552 /* 653 /*
553 * Try to place the inode in its parent directory 654 * Try to place the inode in its parent directory
@@ -716,10 +817,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
716 sbi = EXT4_SB(sb); 817 sbi = EXT4_SB(sb);
717 es = sbi->s_es; 818 es = sbi->s_es;
718 819
719 if (sbi->s_log_groups_per_flex) { 820 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
720 ret2 = find_group_flex(sb, dir, &group); 821 ret2 = find_group_flex(sb, dir, &group);
721 if (ret2 == -1) { 822 if (ret2 == -1) {
722 ret2 = find_group_other(sb, dir, &group); 823 ret2 = find_group_other(sb, dir, &group, mode);
723 if (ret2 == 0 && once) 824 if (ret2 == 0 && once)
724 once = 0; 825 once = 0;
725 printk(KERN_NOTICE "ext4: find_group_flex " 826 printk(KERN_NOTICE "ext4: find_group_flex "
@@ -733,11 +834,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
733 if (test_opt(sb, OLDALLOC)) 834 if (test_opt(sb, OLDALLOC))
734 ret2 = find_group_dir(sb, dir, &group); 835 ret2 = find_group_dir(sb, dir, &group);
735 else 836 else
736 ret2 = find_group_orlov(sb, dir, &group); 837 ret2 = find_group_orlov(sb, dir, &group, mode);
737 } else 838 } else
738 ret2 = find_group_other(sb, dir, &group); 839 ret2 = find_group_other(sb, dir, &group, mode);
739 840
740got_group: 841got_group:
842 EXT4_I(dir)->i_last_alloc_group = group;
741 err = -ENOSPC; 843 err = -ENOSPC;
742 if (ret2 == -1) 844 if (ret2 == -1)
743 goto out; 845 goto out;
@@ -894,6 +996,7 @@ got:
894 ei->i_file_acl = 0; 996 ei->i_file_acl = 0;
895 ei->i_dtime = 0; 997 ei->i_dtime = 0;
896 ei->i_block_group = group; 998 ei->i_block_group = group;
999 ei->i_last_alloc_group = ~0;
897 1000
898 ext4_set_inode_flags(inode); 1001 ext4_set_inode_flags(inode);
899 if (IS_DIRSYNC(inode)) 1002 if (IS_DIRSYNC(inode))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 71d3ecd5db79..25811507d2b0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -459,6 +459,8 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
459 ext4_fsblk_t bg_start; 459 ext4_fsblk_t bg_start;
460 ext4_fsblk_t last_block; 460 ext4_fsblk_t last_block;
461 ext4_grpblk_t colour; 461 ext4_grpblk_t colour;
462 ext4_group_t block_group;
463 int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
462 464
463 /* Try to find previous block */ 465 /* Try to find previous block */
464 for (p = ind->p - 1; p >= start; p--) { 466 for (p = ind->p - 1; p >= start; p--) {
@@ -474,9 +476,22 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
474 * It is going to be referred to from the inode itself? OK, just put it 476 * It is going to be referred to from the inode itself? OK, just put it
475 * into the same cylinder group then. 477 * into the same cylinder group then.
476 */ 478 */
477 bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group); 479 block_group = ei->i_block_group;
480 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
481 block_group &= ~(flex_size-1);
482 if (S_ISREG(inode->i_mode))
483 block_group++;
484 }
485 bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
478 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; 486 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
479 487
488 /*
489 * If we are doing delayed allocation, we don't need take
490 * colour into account.
491 */
492 if (test_opt(inode->i_sb, DELALLOC))
493 return bg_start;
494
480 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) 495 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
481 colour = (current->pid % 16) * 496 colour = (current->pid % 16) *
482 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); 497 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
@@ -4287,6 +4302,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4287 ei->i_disksize = inode->i_size; 4302 ei->i_disksize = inode->i_size;
4288 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 4303 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
4289 ei->i_block_group = iloc.block_group; 4304 ei->i_block_group = iloc.block_group;
4305 ei->i_last_alloc_group = ~0;
4290 /* 4306 /*
4291 * NOTE! The in-memory inode i_data array is in little-endian order 4307 * NOTE! The in-memory inode i_data array is in little-endian order
4292 * even on big-endian machines: we do NOT byteswap the block numbers! 4308 * even on big-endian machines: we do NOT byteswap the block numbers!
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b038188bd039..b0d6022eaa67 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1726,6 +1726,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1726{ 1726{
1727 unsigned free, fragments; 1727 unsigned free, fragments;
1728 unsigned i, bits; 1728 unsigned i, bits;
1729 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1729 struct ext4_group_desc *desc; 1730 struct ext4_group_desc *desc;
1730 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); 1731 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1731 1732
@@ -1747,6 +1748,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1747 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) 1748 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1748 return 0; 1749 return 0;
1749 1750
1751 /* Avoid using the first bg of a flexgroup for data files */
1752 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
1753 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
1754 ((group % flex_size) == 0))
1755 return 0;
1756
1750 bits = ac->ac_sb->s_blocksize_bits + 1; 1757 bits = ac->ac_sb->s_blocksize_bits + 1;
1751 for (i = ac->ac_2order; i <= bits; i++) 1758 for (i = ac->ac_2order; i <= bits; i++)
1752 if (grp->bb_counters[i] > 0) 1759 if (grp->bb_counters[i] > 0)