aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/ialloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r--fs/ext4/ialloc.c215
1 files changed, 159 insertions, 56 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ae3eb57dccdd..617f5a2d800a 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -410,6 +410,43 @@ out:
410 return 0; 410 return 0;
411} 411}
412 412
413struct orlov_stats {
414 __u32 free_inodes;
415 __u32 free_blocks;
416 __u32 used_dirs;
417};
418
419/*
420 * Helper function for Orlov's allocator; returns critical information
421 * for a particular block group or flex_bg. If flex_size is 1, then g
422 * is a block group number; otherwise it is flex_bg number.
423 */
424void get_orlov_stats(struct super_block *sb, ext4_group_t g,
425 int flex_size, struct orlov_stats *stats)
426{
427 struct ext4_group_desc *desc;
428 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
429 int i;
430
431 stats->free_inodes = 0;
432 stats->free_blocks = 0;
433 stats->used_dirs = 0;
434
435 g *= flex_size;
436
437 for (i = 0; i < flex_size; i++) {
438 if (g >= ngroups)
439 break;
440 desc = ext4_get_group_desc(sb, g++, NULL);
441 if (!desc)
442 continue;
443
444 stats->free_inodes += ext4_free_inodes_count(sb, desc);
445 stats->free_blocks += ext4_free_blks_count(sb, desc);
446 stats->used_dirs += ext4_used_dirs_count(sb, desc);
447 }
448}
449
413/* 450/*
414 * Orlov's allocator for directories. 451 * Orlov's allocator for directories.
415 * 452 *
@@ -425,35 +462,34 @@ out:
425 * it has too many directories already (max_dirs) or 462 * it has too many directories already (max_dirs) or
426 * it has too few free inodes left (min_inodes) or 463 * it has too few free inodes left (min_inodes) or
427 * it has too few free blocks left (min_blocks) or 464 * it has too few free blocks left (min_blocks) or
428 * it's already running too large debt (max_debt).
429 * Parent's group is preferred, if it doesn't satisfy these 465 * Parent's group is preferred, if it doesn't satisfy these
430 * conditions we search cyclically through the rest. If none 466 * conditions we search cyclically through the rest. If none
431 * of the groups look good we just look for a group with more 467 * of the groups look good we just look for a group with more
432 * free inodes than average (starting at parent's group). 468 * free inodes than average (starting at parent's group).
433 *
434 * Debt is incremented each time we allocate a directory and decremented
435 * when we allocate an inode, within 0--255.
436 */ 469 */
437 470
438#define INODE_COST 64
439#define BLOCK_COST 256
440
441static int find_group_orlov(struct super_block *sb, struct inode *parent, 471static int find_group_orlov(struct super_block *sb, struct inode *parent,
442 ext4_group_t *group) 472 ext4_group_t *group, int mode)
443{ 473{
444 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 474 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
445 struct ext4_sb_info *sbi = EXT4_SB(sb); 475 struct ext4_sb_info *sbi = EXT4_SB(sb);
446 struct ext4_super_block *es = sbi->s_es;
447 ext4_group_t ngroups = sbi->s_groups_count; 476 ext4_group_t ngroups = sbi->s_groups_count;
448 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 477 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
449 unsigned int freei, avefreei; 478 unsigned int freei, avefreei;
450 ext4_fsblk_t freeb, avefreeb; 479 ext4_fsblk_t freeb, avefreeb;
451 ext4_fsblk_t blocks_per_dir;
452 unsigned int ndirs; 480 unsigned int ndirs;
453 int max_debt, max_dirs, min_inodes; 481 int max_dirs, min_inodes;
454 ext4_grpblk_t min_blocks; 482 ext4_grpblk_t min_blocks;
455 ext4_group_t i; 483 ext4_group_t i, grp, g;
456 struct ext4_group_desc *desc; 484 struct ext4_group_desc *desc;
485 struct orlov_stats stats;
486 int flex_size = ext4_flex_bg_size(sbi);
487
488 if (flex_size > 1) {
489 ngroups = (ngroups + flex_size - 1) >>
490 sbi->s_log_groups_per_flex;
491 parent_group >>= sbi->s_log_groups_per_flex;
492 }
457 493
458 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); 494 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
459 avefreei = freei / ngroups; 495 avefreei = freei / ngroups;
@@ -462,71 +498,97 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
462 do_div(avefreeb, ngroups); 498 do_div(avefreeb, ngroups);
463 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); 499 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
464 500
465 if ((parent == sb->s_root->d_inode) || 501 if (S_ISDIR(mode) &&
466 (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { 502 ((parent == sb->s_root->d_inode) ||
503 (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) {
467 int best_ndir = inodes_per_group; 504 int best_ndir = inodes_per_group;
468 ext4_group_t grp;
469 int ret = -1; 505 int ret = -1;
470 506
471 get_random_bytes(&grp, sizeof(grp)); 507 get_random_bytes(&grp, sizeof(grp));
472 parent_group = (unsigned)grp % ngroups; 508 parent_group = (unsigned)grp % ngroups;
473 for (i = 0; i < ngroups; i++) { 509 for (i = 0; i < ngroups; i++) {
474 grp = (parent_group + i) % ngroups; 510 g = (parent_group + i) % ngroups;
475 desc = ext4_get_group_desc(sb, grp, NULL); 511 get_orlov_stats(sb, g, flex_size, &stats);
476 if (!desc || !ext4_free_inodes_count(sb, desc)) 512 if (!stats.free_inodes)
477 continue; 513 continue;
478 if (ext4_used_dirs_count(sb, desc) >= best_ndir) 514 if (stats.used_dirs >= best_ndir)
479 continue; 515 continue;
480 if (ext4_free_inodes_count(sb, desc) < avefreei) 516 if (stats.free_inodes < avefreei)
481 continue; 517 continue;
482 if (ext4_free_blks_count(sb, desc) < avefreeb) 518 if (stats.free_blocks < avefreeb)
483 continue; 519 continue;
484 *group = grp; 520 grp = g;
485 ret = 0; 521 ret = 0;
486 best_ndir = ext4_used_dirs_count(sb, desc); 522 best_ndir = stats.used_dirs;
523 }
524 if (ret)
525 goto fallback;
526 found_flex_bg:
527 if (flex_size == 1) {
528 *group = grp;
529 return 0;
530 }
531
532 /*
533 * We pack inodes at the beginning of the flexgroup's
534 * inode tables. Block allocation decisions will do
535 * something similar, although regular files will
536 * start at 2nd block group of the flexgroup. See
537 * ext4_ext_find_goal() and ext4_find_near().
538 */
539 grp *= flex_size;
540 for (i = 0; i < flex_size; i++) {
541 if (grp+i >= sbi->s_groups_count)
542 break;
543 desc = ext4_get_group_desc(sb, grp+i, NULL);
544 if (desc && ext4_free_inodes_count(sb, desc)) {
545 *group = grp+i;
546 return 0;
547 }
487 } 548 }
488 if (ret == 0)
489 return ret;
490 goto fallback; 549 goto fallback;
491 } 550 }
492 551
493 blocks_per_dir = ext4_blocks_count(es) - freeb;
494 do_div(blocks_per_dir, ndirs);
495
496 max_dirs = ndirs / ngroups + inodes_per_group / 16; 552 max_dirs = ndirs / ngroups + inodes_per_group / 16;
497 min_inodes = avefreei - inodes_per_group / 4; 553 min_inodes = avefreei - inodes_per_group*flex_size / 4;
498 min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4; 554 if (min_inodes < 1)
499 555 min_inodes = 1;
500 max_debt = EXT4_BLOCKS_PER_GROUP(sb); 556 min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4;
501 max_debt /= max_t(int, blocks_per_dir, BLOCK_COST); 557
502 if (max_debt * INODE_COST > inodes_per_group) 558 /*
503 max_debt = inodes_per_group / INODE_COST; 559 * Start looking in the flex group where we last allocated an
504 if (max_debt > 255) 560 * inode for this parent directory
505 max_debt = 255; 561 */
506 if (max_debt == 0) 562 if (EXT4_I(parent)->i_last_alloc_group != ~0) {
507 max_debt = 1; 563 parent_group = EXT4_I(parent)->i_last_alloc_group;
564 if (flex_size > 1)
565 parent_group >>= sbi->s_log_groups_per_flex;
566 }
508 567
509 for (i = 0; i < ngroups; i++) { 568 for (i = 0; i < ngroups; i++) {
510 *group = (parent_group + i) % ngroups; 569 grp = (parent_group + i) % ngroups;
511 desc = ext4_get_group_desc(sb, *group, NULL); 570 get_orlov_stats(sb, grp, flex_size, &stats);
512 if (!desc || !ext4_free_inodes_count(sb, desc)) 571 if (stats.used_dirs >= max_dirs)
513 continue;
514 if (ext4_used_dirs_count(sb, desc) >= max_dirs)
515 continue; 572 continue;
516 if (ext4_free_inodes_count(sb, desc) < min_inodes) 573 if (stats.free_inodes < min_inodes)
517 continue; 574 continue;
518 if (ext4_free_blks_count(sb, desc) < min_blocks) 575 if (stats.free_blocks < min_blocks)
519 continue; 576 continue;
520 return 0; 577 goto found_flex_bg;
521 } 578 }
522 579
523fallback: 580fallback:
581 ngroups = sbi->s_groups_count;
582 avefreei = freei / ngroups;
583 parent_group = EXT4_I(parent)->i_block_group;
524 for (i = 0; i < ngroups; i++) { 584 for (i = 0; i < ngroups; i++) {
525 *group = (parent_group + i) % ngroups; 585 grp = (parent_group + i) % ngroups;
526 desc = ext4_get_group_desc(sb, *group, NULL); 586 desc = ext4_get_group_desc(sb, grp, NULL);
527 if (desc && ext4_free_inodes_count(sb, desc) && 587 if (desc && ext4_free_inodes_count(sb, desc) &&
528 ext4_free_inodes_count(sb, desc) >= avefreei) 588 ext4_free_inodes_count(sb, desc) >= avefreei) {
589 *group = grp;
529 return 0; 590 return 0;
591 }
530 } 592 }
531 593
532 if (avefreei) { 594 if (avefreei) {
@@ -542,12 +604,51 @@ fallback:
542} 604}
543 605
544static int find_group_other(struct super_block *sb, struct inode *parent, 606static int find_group_other(struct super_block *sb, struct inode *parent,
545 ext4_group_t *group) 607 ext4_group_t *group, int mode)
546{ 608{
547 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 609 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
548 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 610 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
549 struct ext4_group_desc *desc; 611 struct ext4_group_desc *desc;
550 ext4_group_t i; 612 ext4_group_t i, last;
613 int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
614
615 /*
616 * Try to place the inode is the same flex group as its
617 * parent. If we can't find space, use the Orlov algorithm to
618 * find another flex group, and store that information in the
619 * parent directory's inode information so that use that flex
620 * group for future allocations.
621 */
622 if (flex_size > 1) {
623 int retry = 0;
624
625 try_again:
626 parent_group &= ~(flex_size-1);
627 last = parent_group + flex_size;
628 if (last > ngroups)
629 last = ngroups;
630 for (i = parent_group; i < last; i++) {
631 desc = ext4_get_group_desc(sb, i, NULL);
632 if (desc && ext4_free_inodes_count(sb, desc)) {
633 *group = i;
634 return 0;
635 }
636 }
637 if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) {
638 retry = 1;
639 parent_group = EXT4_I(parent)->i_last_alloc_group;
640 goto try_again;
641 }
642 /*
643 * If this didn't work, use the Orlov search algorithm
644 * to find a new flex group; we pass in the mode to
645 * avoid the topdir algorithms.
646 */
647 *group = parent_group + flex_size;
648 if (*group > ngroups)
649 *group = 0;
650 return find_group_orlov(sb, parent, group, mode);
651 }
551 652
552 /* 653 /*
553 * Try to place the inode in its parent directory 654 * Try to place the inode in its parent directory
@@ -716,10 +817,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
716 sbi = EXT4_SB(sb); 817 sbi = EXT4_SB(sb);
717 es = sbi->s_es; 818 es = sbi->s_es;
718 819
719 if (sbi->s_log_groups_per_flex) { 820 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
720 ret2 = find_group_flex(sb, dir, &group); 821 ret2 = find_group_flex(sb, dir, &group);
721 if (ret2 == -1) { 822 if (ret2 == -1) {
722 ret2 = find_group_other(sb, dir, &group); 823 ret2 = find_group_other(sb, dir, &group, mode);
723 if (ret2 == 0 && once) 824 if (ret2 == 0 && once)
724 once = 0; 825 once = 0;
725 printk(KERN_NOTICE "ext4: find_group_flex " 826 printk(KERN_NOTICE "ext4: find_group_flex "
@@ -733,11 +834,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
733 if (test_opt(sb, OLDALLOC)) 834 if (test_opt(sb, OLDALLOC))
734 ret2 = find_group_dir(sb, dir, &group); 835 ret2 = find_group_dir(sb, dir, &group);
735 else 836 else
736 ret2 = find_group_orlov(sb, dir, &group); 837 ret2 = find_group_orlov(sb, dir, &group, mode);
737 } else 838 } else
738 ret2 = find_group_other(sb, dir, &group); 839 ret2 = find_group_other(sb, dir, &group, mode);
739 840
740got_group: 841got_group:
842 EXT4_I(dir)->i_last_alloc_group = group;
741 err = -ENOSPC; 843 err = -ENOSPC;
742 if (ret2 == -1) 844 if (ret2 == -1)
743 goto out; 845 goto out;
@@ -894,6 +996,7 @@ got:
894 ei->i_file_acl = 0; 996 ei->i_file_acl = 0;
895 ei->i_dtime = 0; 997 ei->i_dtime = 0;
896 ei->i_block_group = group; 998 ei->i_block_group = group;
999 ei->i_last_alloc_group = ~0;
897 1000
898 ext4_set_inode_flags(inode); 1001 ext4_set_inode_flags(inode);
899 if (IS_DIRSYNC(inode)) 1002 if (IS_DIRSYNC(inode))