diff options
-rw-r--r-- | fs/ext4/ext4.h | 6 | ||||
-rw-r--r-- | fs/ext4/ext4_i.h | 3 | ||||
-rw-r--r-- | fs/ext4/extents.c | 25 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 215 | ||||
-rw-r--r-- | fs/ext4/inode.c | 18 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 7 |
6 files changed, 216 insertions, 58 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2cbfc0b04d37..096456c8559b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -828,6 +828,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
828 | #define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */ | 828 | #define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */ |
829 | 829 | ||
830 | /* | 830 | /* |
831 | * Minimum number of groups in a flexgroup before we separate out | ||
832 | * directories into the first block group of a flexgroup | ||
833 | */ | ||
834 | #define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4 | ||
835 | |||
836 | /* | ||
831 | * Structure of a directory entry | 837 | * Structure of a directory entry |
832 | */ | 838 | */ |
833 | #define EXT4_NAME_LEN 255 | 839 | #define EXT4_NAME_LEN 255 |
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h index 2d516c0a22af..4ce2187123aa 100644 --- a/fs/ext4/ext4_i.h +++ b/fs/ext4/ext4_i.h | |||
@@ -122,6 +122,9 @@ struct ext4_inode_info { | |||
122 | struct list_head i_prealloc_list; | 122 | struct list_head i_prealloc_list; |
123 | spinlock_t i_prealloc_lock; | 123 | spinlock_t i_prealloc_lock; |
124 | 124 | ||
125 | /* ialloc */ | ||
126 | ext4_group_t i_last_alloc_group; | ||
127 | |||
125 | /* allocation reservation info for delalloc */ | 128 | /* allocation reservation info for delalloc */ |
126 | unsigned int i_reserved_data_blocks; | 129 | unsigned int i_reserved_data_blocks; |
127 | unsigned int i_reserved_meta_blocks; | 130 | unsigned int i_reserved_meta_blocks; |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e0aa4fe4f596..aa3431856c9a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -152,6 +152,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
152 | ext4_fsblk_t bg_start; | 152 | ext4_fsblk_t bg_start; |
153 | ext4_fsblk_t last_block; | 153 | ext4_fsblk_t last_block; |
154 | ext4_grpblk_t colour; | 154 | ext4_grpblk_t colour; |
155 | ext4_group_t block_group; | ||
156 | int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); | ||
155 | int depth; | 157 | int depth; |
156 | 158 | ||
157 | if (path) { | 159 | if (path) { |
@@ -170,10 +172,31 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
170 | } | 172 | } |
171 | 173 | ||
172 | /* OK. use inode's group */ | 174 | /* OK. use inode's group */ |
173 | bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + | 175 | block_group = ei->i_block_group; |
176 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { | ||
177 | /* | ||
178 | * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME | ||
179 | * block groups per flexgroup, reserve the first block | ||
180 | * group for directories and special files. Regular | ||
181 | * files will start at the second block group. This | ||
182 | * tends to speed up directory access and improves | ||
183 | * fsck times. | ||
184 | */ | ||
185 | block_group &= ~(flex_size-1); | ||
186 | if (S_ISREG(inode->i_mode)) | ||
187 | block_group++; | ||
188 | } | ||
189 | bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + | ||
174 | le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); | 190 | le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); |
175 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; | 191 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; |
176 | 192 | ||
193 | /* | ||
194 | * If we are doing delayed allocation, we don't need take | ||
195 | * colour into account. | ||
196 | */ | ||
197 | if (test_opt(inode->i_sb, DELALLOC)) | ||
198 | return bg_start; | ||
199 | |||
177 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) | 200 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) |
178 | colour = (current->pid % 16) * | 201 | colour = (current->pid % 16) * |
179 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); | 202 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index ae3eb57dccdd..617f5a2d800a 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -410,6 +410,43 @@ out: | |||
410 | return 0; | 410 | return 0; |
411 | } | 411 | } |
412 | 412 | ||
413 | struct orlov_stats { | ||
414 | __u32 free_inodes; | ||
415 | __u32 free_blocks; | ||
416 | __u32 used_dirs; | ||
417 | }; | ||
418 | |||
419 | /* | ||
420 | * Helper function for Orlov's allocator; returns critical information | ||
421 | * for a particular block group or flex_bg. If flex_size is 1, then g | ||
422 | * is a block group number; otherwise it is flex_bg number. | ||
423 | */ | ||
424 | void get_orlov_stats(struct super_block *sb, ext4_group_t g, | ||
425 | int flex_size, struct orlov_stats *stats) | ||
426 | { | ||
427 | struct ext4_group_desc *desc; | ||
428 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | ||
429 | int i; | ||
430 | |||
431 | stats->free_inodes = 0; | ||
432 | stats->free_blocks = 0; | ||
433 | stats->used_dirs = 0; | ||
434 | |||
435 | g *= flex_size; | ||
436 | |||
437 | for (i = 0; i < flex_size; i++) { | ||
438 | if (g >= ngroups) | ||
439 | break; | ||
440 | desc = ext4_get_group_desc(sb, g++, NULL); | ||
441 | if (!desc) | ||
442 | continue; | ||
443 | |||
444 | stats->free_inodes += ext4_free_inodes_count(sb, desc); | ||
445 | stats->free_blocks += ext4_free_blks_count(sb, desc); | ||
446 | stats->used_dirs += ext4_used_dirs_count(sb, desc); | ||
447 | } | ||
448 | } | ||
449 | |||
413 | /* | 450 | /* |
414 | * Orlov's allocator for directories. | 451 | * Orlov's allocator for directories. |
415 | * | 452 | * |
@@ -425,35 +462,34 @@ out: | |||
425 | * it has too many directories already (max_dirs) or | 462 | * it has too many directories already (max_dirs) or |
426 | * it has too few free inodes left (min_inodes) or | 463 | * it has too few free inodes left (min_inodes) or |
427 | * it has too few free blocks left (min_blocks) or | 464 | * it has too few free blocks left (min_blocks) or |
428 | * it's already running too large debt (max_debt). | ||
429 | * Parent's group is preferred, if it doesn't satisfy these | 465 | * Parent's group is preferred, if it doesn't satisfy these |
430 | * conditions we search cyclically through the rest. If none | 466 | * conditions we search cyclically through the rest. If none |
431 | * of the groups look good we just look for a group with more | 467 | * of the groups look good we just look for a group with more |
432 | * free inodes than average (starting at parent's group). | 468 | * free inodes than average (starting at parent's group). |
433 | * | ||
434 | * Debt is incremented each time we allocate a directory and decremented | ||
435 | * when we allocate an inode, within 0--255. | ||
436 | */ | 469 | */ |
437 | 470 | ||
438 | #define INODE_COST 64 | ||
439 | #define BLOCK_COST 256 | ||
440 | |||
441 | static int find_group_orlov(struct super_block *sb, struct inode *parent, | 471 | static int find_group_orlov(struct super_block *sb, struct inode *parent, |
442 | ext4_group_t *group) | 472 | ext4_group_t *group, int mode) |
443 | { | 473 | { |
444 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | 474 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
445 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 475 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
446 | struct ext4_super_block *es = sbi->s_es; | ||
447 | ext4_group_t ngroups = sbi->s_groups_count; | 476 | ext4_group_t ngroups = sbi->s_groups_count; |
448 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); | 477 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); |
449 | unsigned int freei, avefreei; | 478 | unsigned int freei, avefreei; |
450 | ext4_fsblk_t freeb, avefreeb; | 479 | ext4_fsblk_t freeb, avefreeb; |
451 | ext4_fsblk_t blocks_per_dir; | ||
452 | unsigned int ndirs; | 480 | unsigned int ndirs; |
453 | int max_debt, max_dirs, min_inodes; | 481 | int max_dirs, min_inodes; |
454 | ext4_grpblk_t min_blocks; | 482 | ext4_grpblk_t min_blocks; |
455 | ext4_group_t i; | 483 | ext4_group_t i, grp, g; |
456 | struct ext4_group_desc *desc; | 484 | struct ext4_group_desc *desc; |
485 | struct orlov_stats stats; | ||
486 | int flex_size = ext4_flex_bg_size(sbi); | ||
487 | |||
488 | if (flex_size > 1) { | ||
489 | ngroups = (ngroups + flex_size - 1) >> | ||
490 | sbi->s_log_groups_per_flex; | ||
491 | parent_group >>= sbi->s_log_groups_per_flex; | ||
492 | } | ||
457 | 493 | ||
458 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); | 494 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); |
459 | avefreei = freei / ngroups; | 495 | avefreei = freei / ngroups; |
@@ -462,71 +498,97 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
462 | do_div(avefreeb, ngroups); | 498 | do_div(avefreeb, ngroups); |
463 | ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); | 499 | ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); |
464 | 500 | ||
465 | if ((parent == sb->s_root->d_inode) || | 501 | if (S_ISDIR(mode) && |
466 | (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { | 502 | ((parent == sb->s_root->d_inode) || |
503 | (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) { | ||
467 | int best_ndir = inodes_per_group; | 504 | int best_ndir = inodes_per_group; |
468 | ext4_group_t grp; | ||
469 | int ret = -1; | 505 | int ret = -1; |
470 | 506 | ||
471 | get_random_bytes(&grp, sizeof(grp)); | 507 | get_random_bytes(&grp, sizeof(grp)); |
472 | parent_group = (unsigned)grp % ngroups; | 508 | parent_group = (unsigned)grp % ngroups; |
473 | for (i = 0; i < ngroups; i++) { | 509 | for (i = 0; i < ngroups; i++) { |
474 | grp = (parent_group + i) % ngroups; | 510 | g = (parent_group + i) % ngroups; |
475 | desc = ext4_get_group_desc(sb, grp, NULL); | 511 | get_orlov_stats(sb, g, flex_size, &stats); |
476 | if (!desc || !ext4_free_inodes_count(sb, desc)) | 512 | if (!stats.free_inodes) |
477 | continue; | 513 | continue; |
478 | if (ext4_used_dirs_count(sb, desc) >= best_ndir) | 514 | if (stats.used_dirs >= best_ndir) |
479 | continue; | 515 | continue; |
480 | if (ext4_free_inodes_count(sb, desc) < avefreei) | 516 | if (stats.free_inodes < avefreei) |
481 | continue; | 517 | continue; |
482 | if (ext4_free_blks_count(sb, desc) < avefreeb) | 518 | if (stats.free_blocks < avefreeb) |
483 | continue; | 519 | continue; |
484 | *group = grp; | 520 | grp = g; |
485 | ret = 0; | 521 | ret = 0; |
486 | best_ndir = ext4_used_dirs_count(sb, desc); | 522 | best_ndir = stats.used_dirs; |
523 | } | ||
524 | if (ret) | ||
525 | goto fallback; | ||
526 | found_flex_bg: | ||
527 | if (flex_size == 1) { | ||
528 | *group = grp; | ||
529 | return 0; | ||
530 | } | ||
531 | |||
532 | /* | ||
533 | * We pack inodes at the beginning of the flexgroup's | ||
534 | * inode tables. Block allocation decisions will do | ||
535 | * something similar, although regular files will | ||
536 | * start at 2nd block group of the flexgroup. See | ||
537 | * ext4_ext_find_goal() and ext4_find_near(). | ||
538 | */ | ||
539 | grp *= flex_size; | ||
540 | for (i = 0; i < flex_size; i++) { | ||
541 | if (grp+i >= sbi->s_groups_count) | ||
542 | break; | ||
543 | desc = ext4_get_group_desc(sb, grp+i, NULL); | ||
544 | if (desc && ext4_free_inodes_count(sb, desc)) { | ||
545 | *group = grp+i; | ||
546 | return 0; | ||
547 | } | ||
487 | } | 548 | } |
488 | if (ret == 0) | ||
489 | return ret; | ||
490 | goto fallback; | 549 | goto fallback; |
491 | } | 550 | } |
492 | 551 | ||
493 | blocks_per_dir = ext4_blocks_count(es) - freeb; | ||
494 | do_div(blocks_per_dir, ndirs); | ||
495 | |||
496 | max_dirs = ndirs / ngroups + inodes_per_group / 16; | 552 | max_dirs = ndirs / ngroups + inodes_per_group / 16; |
497 | min_inodes = avefreei - inodes_per_group / 4; | 553 | min_inodes = avefreei - inodes_per_group*flex_size / 4; |
498 | min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4; | 554 | if (min_inodes < 1) |
499 | 555 | min_inodes = 1; | |
500 | max_debt = EXT4_BLOCKS_PER_GROUP(sb); | 556 | min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4; |
501 | max_debt /= max_t(int, blocks_per_dir, BLOCK_COST); | 557 | |
502 | if (max_debt * INODE_COST > inodes_per_group) | 558 | /* |
503 | max_debt = inodes_per_group / INODE_COST; | 559 | * Start looking in the flex group where we last allocated an |
504 | if (max_debt > 255) | 560 | * inode for this parent directory |
505 | max_debt = 255; | 561 | */ |
506 | if (max_debt == 0) | 562 | if (EXT4_I(parent)->i_last_alloc_group != ~0) { |
507 | max_debt = 1; | 563 | parent_group = EXT4_I(parent)->i_last_alloc_group; |
564 | if (flex_size > 1) | ||
565 | parent_group >>= sbi->s_log_groups_per_flex; | ||
566 | } | ||
508 | 567 | ||
509 | for (i = 0; i < ngroups; i++) { | 568 | for (i = 0; i < ngroups; i++) { |
510 | *group = (parent_group + i) % ngroups; | 569 | grp = (parent_group + i) % ngroups; |
511 | desc = ext4_get_group_desc(sb, *group, NULL); | 570 | get_orlov_stats(sb, grp, flex_size, &stats); |
512 | if (!desc || !ext4_free_inodes_count(sb, desc)) | 571 | if (stats.used_dirs >= max_dirs) |
513 | continue; | ||
514 | if (ext4_used_dirs_count(sb, desc) >= max_dirs) | ||
515 | continue; | 572 | continue; |
516 | if (ext4_free_inodes_count(sb, desc) < min_inodes) | 573 | if (stats.free_inodes < min_inodes) |
517 | continue; | 574 | continue; |
518 | if (ext4_free_blks_count(sb, desc) < min_blocks) | 575 | if (stats.free_blocks < min_blocks) |
519 | continue; | 576 | continue; |
520 | return 0; | 577 | goto found_flex_bg; |
521 | } | 578 | } |
522 | 579 | ||
523 | fallback: | 580 | fallback: |
581 | ngroups = sbi->s_groups_count; | ||
582 | avefreei = freei / ngroups; | ||
583 | parent_group = EXT4_I(parent)->i_block_group; | ||
524 | for (i = 0; i < ngroups; i++) { | 584 | for (i = 0; i < ngroups; i++) { |
525 | *group = (parent_group + i) % ngroups; | 585 | grp = (parent_group + i) % ngroups; |
526 | desc = ext4_get_group_desc(sb, *group, NULL); | 586 | desc = ext4_get_group_desc(sb, grp, NULL); |
527 | if (desc && ext4_free_inodes_count(sb, desc) && | 587 | if (desc && ext4_free_inodes_count(sb, desc) && |
528 | ext4_free_inodes_count(sb, desc) >= avefreei) | 588 | ext4_free_inodes_count(sb, desc) >= avefreei) { |
589 | *group = grp; | ||
529 | return 0; | 590 | return 0; |
591 | } | ||
530 | } | 592 | } |
531 | 593 | ||
532 | if (avefreei) { | 594 | if (avefreei) { |
@@ -542,12 +604,51 @@ fallback: | |||
542 | } | 604 | } |
543 | 605 | ||
544 | static int find_group_other(struct super_block *sb, struct inode *parent, | 606 | static int find_group_other(struct super_block *sb, struct inode *parent, |
545 | ext4_group_t *group) | 607 | ext4_group_t *group, int mode) |
546 | { | 608 | { |
547 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | 609 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
548 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | 610 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; |
549 | struct ext4_group_desc *desc; | 611 | struct ext4_group_desc *desc; |
550 | ext4_group_t i; | 612 | ext4_group_t i, last; |
613 | int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); | ||
614 | |||
615 | /* | ||
616 | * Try to place the inode is the same flex group as its | ||
617 | * parent. If we can't find space, use the Orlov algorithm to | ||
618 | * find another flex group, and store that information in the | ||
619 | * parent directory's inode information so that use that flex | ||
620 | * group for future allocations. | ||
621 | */ | ||
622 | if (flex_size > 1) { | ||
623 | int retry = 0; | ||
624 | |||
625 | try_again: | ||
626 | parent_group &= ~(flex_size-1); | ||
627 | last = parent_group + flex_size; | ||
628 | if (last > ngroups) | ||
629 | last = ngroups; | ||
630 | for (i = parent_group; i < last; i++) { | ||
631 | desc = ext4_get_group_desc(sb, i, NULL); | ||
632 | if (desc && ext4_free_inodes_count(sb, desc)) { | ||
633 | *group = i; | ||
634 | return 0; | ||
635 | } | ||
636 | } | ||
637 | if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) { | ||
638 | retry = 1; | ||
639 | parent_group = EXT4_I(parent)->i_last_alloc_group; | ||
640 | goto try_again; | ||
641 | } | ||
642 | /* | ||
643 | * If this didn't work, use the Orlov search algorithm | ||
644 | * to find a new flex group; we pass in the mode to | ||
645 | * avoid the topdir algorithms. | ||
646 | */ | ||
647 | *group = parent_group + flex_size; | ||
648 | if (*group > ngroups) | ||
649 | *group = 0; | ||
650 | return find_group_orlov(sb, parent, group, mode); | ||
651 | } | ||
551 | 652 | ||
552 | /* | 653 | /* |
553 | * Try to place the inode in its parent directory | 654 | * Try to place the inode in its parent directory |
@@ -716,10 +817,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | |||
716 | sbi = EXT4_SB(sb); | 817 | sbi = EXT4_SB(sb); |
717 | es = sbi->s_es; | 818 | es = sbi->s_es; |
718 | 819 | ||
719 | if (sbi->s_log_groups_per_flex) { | 820 | if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { |
720 | ret2 = find_group_flex(sb, dir, &group); | 821 | ret2 = find_group_flex(sb, dir, &group); |
721 | if (ret2 == -1) { | 822 | if (ret2 == -1) { |
722 | ret2 = find_group_other(sb, dir, &group); | 823 | ret2 = find_group_other(sb, dir, &group, mode); |
723 | if (ret2 == 0 && once) | 824 | if (ret2 == 0 && once) |
724 | once = 0; | 825 | once = 0; |
725 | printk(KERN_NOTICE "ext4: find_group_flex " | 826 | printk(KERN_NOTICE "ext4: find_group_flex " |
@@ -733,11 +834,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | |||
733 | if (test_opt(sb, OLDALLOC)) | 834 | if (test_opt(sb, OLDALLOC)) |
734 | ret2 = find_group_dir(sb, dir, &group); | 835 | ret2 = find_group_dir(sb, dir, &group); |
735 | else | 836 | else |
736 | ret2 = find_group_orlov(sb, dir, &group); | 837 | ret2 = find_group_orlov(sb, dir, &group, mode); |
737 | } else | 838 | } else |
738 | ret2 = find_group_other(sb, dir, &group); | 839 | ret2 = find_group_other(sb, dir, &group, mode); |
739 | 840 | ||
740 | got_group: | 841 | got_group: |
842 | EXT4_I(dir)->i_last_alloc_group = group; | ||
741 | err = -ENOSPC; | 843 | err = -ENOSPC; |
742 | if (ret2 == -1) | 844 | if (ret2 == -1) |
743 | goto out; | 845 | goto out; |
@@ -894,6 +996,7 @@ got: | |||
894 | ei->i_file_acl = 0; | 996 | ei->i_file_acl = 0; |
895 | ei->i_dtime = 0; | 997 | ei->i_dtime = 0; |
896 | ei->i_block_group = group; | 998 | ei->i_block_group = group; |
999 | ei->i_last_alloc_group = ~0; | ||
897 | 1000 | ||
898 | ext4_set_inode_flags(inode); | 1001 | ext4_set_inode_flags(inode); |
899 | if (IS_DIRSYNC(inode)) | 1002 | if (IS_DIRSYNC(inode)) |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 71d3ecd5db79..25811507d2b0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -459,6 +459,8 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
459 | ext4_fsblk_t bg_start; | 459 | ext4_fsblk_t bg_start; |
460 | ext4_fsblk_t last_block; | 460 | ext4_fsblk_t last_block; |
461 | ext4_grpblk_t colour; | 461 | ext4_grpblk_t colour; |
462 | ext4_group_t block_group; | ||
463 | int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); | ||
462 | 464 | ||
463 | /* Try to find previous block */ | 465 | /* Try to find previous block */ |
464 | for (p = ind->p - 1; p >= start; p--) { | 466 | for (p = ind->p - 1; p >= start; p--) { |
@@ -474,9 +476,22 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
474 | * It is going to be referred to from the inode itself? OK, just put it | 476 | * It is going to be referred to from the inode itself? OK, just put it |
475 | * into the same cylinder group then. | 477 | * into the same cylinder group then. |
476 | */ | 478 | */ |
477 | bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group); | 479 | block_group = ei->i_block_group; |
480 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { | ||
481 | block_group &= ~(flex_size-1); | ||
482 | if (S_ISREG(inode->i_mode)) | ||
483 | block_group++; | ||
484 | } | ||
485 | bg_start = ext4_group_first_block_no(inode->i_sb, block_group); | ||
478 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; | 486 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; |
479 | 487 | ||
488 | /* | ||
489 | * If we are doing delayed allocation, we don't need take | ||
490 | * colour into account. | ||
491 | */ | ||
492 | if (test_opt(inode->i_sb, DELALLOC)) | ||
493 | return bg_start; | ||
494 | |||
480 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) | 495 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) |
481 | colour = (current->pid % 16) * | 496 | colour = (current->pid % 16) * |
482 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); | 497 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); |
@@ -4287,6 +4302,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4287 | ei->i_disksize = inode->i_size; | 4302 | ei->i_disksize = inode->i_size; |
4288 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | 4303 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
4289 | ei->i_block_group = iloc.block_group; | 4304 | ei->i_block_group = iloc.block_group; |
4305 | ei->i_last_alloc_group = ~0; | ||
4290 | /* | 4306 | /* |
4291 | * NOTE! The in-memory inode i_data array is in little-endian order | 4307 | * NOTE! The in-memory inode i_data array is in little-endian order |
4292 | * even on big-endian machines: we do NOT byteswap the block numbers! | 4308 | * even on big-endian machines: we do NOT byteswap the block numbers! |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b038188bd039..b0d6022eaa67 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -1726,6 +1726,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1726 | { | 1726 | { |
1727 | unsigned free, fragments; | 1727 | unsigned free, fragments; |
1728 | unsigned i, bits; | 1728 | unsigned i, bits; |
1729 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); | ||
1729 | struct ext4_group_desc *desc; | 1730 | struct ext4_group_desc *desc; |
1730 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); | 1731 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); |
1731 | 1732 | ||
@@ -1747,6 +1748,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1747 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) | 1748 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) |
1748 | return 0; | 1749 | return 0; |
1749 | 1750 | ||
1751 | /* Avoid using the first bg of a flexgroup for data files */ | ||
1752 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && | ||
1753 | (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && | ||
1754 | ((group % flex_size) == 0)) | ||
1755 | return 0; | ||
1756 | |||
1750 | bits = ac->ac_sb->s_blocksize_bits + 1; | 1757 | bits = ac->ac_sb->s_blocksize_bits + 1; |
1751 | for (i = ac->ac_2order; i <= bits; i++) | 1758 | for (i = ac->ac_2order; i <= bits; i++) |
1752 | if (grp->bb_counters[i] > 0) | 1759 | if (grp->bb_counters[i] > 0) |