diff options
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r-- | fs/ext4/ialloc.c | 215 |
1 files changed, 159 insertions, 56 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index ae3eb57dccdd..617f5a2d800a 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -410,6 +410,43 @@ out: | |||
410 | return 0; | 410 | return 0; |
411 | } | 411 | } |
412 | 412 | ||
413 | struct orlov_stats { | ||
414 | __u32 free_inodes; | ||
415 | __u32 free_blocks; | ||
416 | __u32 used_dirs; | ||
417 | }; | ||
418 | |||
419 | /* | ||
420 | * Helper function for Orlov's allocator; returns critical information | ||
421 | * for a particular block group or flex_bg. If flex_size is 1, then g | ||
422 | * is a block group number; otherwise it is flex_bg number. | ||
423 | */ | ||
424 | void get_orlov_stats(struct super_block *sb, ext4_group_t g, | ||
425 | int flex_size, struct orlov_stats *stats) | ||
426 | { | ||
427 | struct ext4_group_desc *desc; | ||
428 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | ||
429 | int i; | ||
430 | |||
431 | stats->free_inodes = 0; | ||
432 | stats->free_blocks = 0; | ||
433 | stats->used_dirs = 0; | ||
434 | |||
435 | g *= flex_size; | ||
436 | |||
437 | for (i = 0; i < flex_size; i++) { | ||
438 | if (g >= ngroups) | ||
439 | break; | ||
440 | desc = ext4_get_group_desc(sb, g++, NULL); | ||
441 | if (!desc) | ||
442 | continue; | ||
443 | |||
444 | stats->free_inodes += ext4_free_inodes_count(sb, desc); | ||
445 | stats->free_blocks += ext4_free_blks_count(sb, desc); | ||
446 | stats->used_dirs += ext4_used_dirs_count(sb, desc); | ||
447 | } | ||
448 | } | ||
449 | |||
413 | /* | 450 | /* |
414 | * Orlov's allocator for directories. | 451 | * Orlov's allocator for directories. |
415 | * | 452 | * |
@@ -425,35 +462,34 @@ out: | |||
425 | * it has too many directories already (max_dirs) or | 462 | * it has too many directories already (max_dirs) or |
426 | * it has too few free inodes left (min_inodes) or | 463 | * it has too few free inodes left (min_inodes) or |
427 | * it has too few free blocks left (min_blocks) or | 464 | * it has too few free blocks left (min_blocks) or |
428 | * it's already running too large debt (max_debt). | ||
429 | * Parent's group is preferred, if it doesn't satisfy these | 465 | * Parent's group is preferred, if it doesn't satisfy these |
430 | * conditions we search cyclically through the rest. If none | 466 | * conditions we search cyclically through the rest. If none |
431 | * of the groups look good we just look for a group with more | 467 | * of the groups look good we just look for a group with more |
432 | * free inodes than average (starting at parent's group). | 468 | * free inodes than average (starting at parent's group). |
433 | * | ||
434 | * Debt is incremented each time we allocate a directory and decremented | ||
435 | * when we allocate an inode, within 0--255. | ||
436 | */ | 469 | */ |
437 | 470 | ||
438 | #define INODE_COST 64 | ||
439 | #define BLOCK_COST 256 | ||
440 | |||
441 | static int find_group_orlov(struct super_block *sb, struct inode *parent, | 471 | static int find_group_orlov(struct super_block *sb, struct inode *parent, |
442 | ext4_group_t *group) | 472 | ext4_group_t *group, int mode) |
443 | { | 473 | { |
444 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | 474 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
445 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 475 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
446 | struct ext4_super_block *es = sbi->s_es; | ||
447 | ext4_group_t ngroups = sbi->s_groups_count; | 476 | ext4_group_t ngroups = sbi->s_groups_count; |
448 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); | 477 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); |
449 | unsigned int freei, avefreei; | 478 | unsigned int freei, avefreei; |
450 | ext4_fsblk_t freeb, avefreeb; | 479 | ext4_fsblk_t freeb, avefreeb; |
451 | ext4_fsblk_t blocks_per_dir; | ||
452 | unsigned int ndirs; | 480 | unsigned int ndirs; |
453 | int max_debt, max_dirs, min_inodes; | 481 | int max_dirs, min_inodes; |
454 | ext4_grpblk_t min_blocks; | 482 | ext4_grpblk_t min_blocks; |
455 | ext4_group_t i; | 483 | ext4_group_t i, grp, g; |
456 | struct ext4_group_desc *desc; | 484 | struct ext4_group_desc *desc; |
485 | struct orlov_stats stats; | ||
486 | int flex_size = ext4_flex_bg_size(sbi); | ||
487 | |||
488 | if (flex_size > 1) { | ||
489 | ngroups = (ngroups + flex_size - 1) >> | ||
490 | sbi->s_log_groups_per_flex; | ||
491 | parent_group >>= sbi->s_log_groups_per_flex; | ||
492 | } | ||
457 | 493 | ||
458 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); | 494 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); |
459 | avefreei = freei / ngroups; | 495 | avefreei = freei / ngroups; |
@@ -462,71 +498,97 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
462 | do_div(avefreeb, ngroups); | 498 | do_div(avefreeb, ngroups); |
463 | ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); | 499 | ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); |
464 | 500 | ||
465 | if ((parent == sb->s_root->d_inode) || | 501 | if (S_ISDIR(mode) && |
466 | (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { | 502 | ((parent == sb->s_root->d_inode) || |
503 | (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) { | ||
467 | int best_ndir = inodes_per_group; | 504 | int best_ndir = inodes_per_group; |
468 | ext4_group_t grp; | ||
469 | int ret = -1; | 505 | int ret = -1; |
470 | 506 | ||
471 | get_random_bytes(&grp, sizeof(grp)); | 507 | get_random_bytes(&grp, sizeof(grp)); |
472 | parent_group = (unsigned)grp % ngroups; | 508 | parent_group = (unsigned)grp % ngroups; |
473 | for (i = 0; i < ngroups; i++) { | 509 | for (i = 0; i < ngroups; i++) { |
474 | grp = (parent_group + i) % ngroups; | 510 | g = (parent_group + i) % ngroups; |
475 | desc = ext4_get_group_desc(sb, grp, NULL); | 511 | get_orlov_stats(sb, g, flex_size, &stats); |
476 | if (!desc || !ext4_free_inodes_count(sb, desc)) | 512 | if (!stats.free_inodes) |
477 | continue; | 513 | continue; |
478 | if (ext4_used_dirs_count(sb, desc) >= best_ndir) | 514 | if (stats.used_dirs >= best_ndir) |
479 | continue; | 515 | continue; |
480 | if (ext4_free_inodes_count(sb, desc) < avefreei) | 516 | if (stats.free_inodes < avefreei) |
481 | continue; | 517 | continue; |
482 | if (ext4_free_blks_count(sb, desc) < avefreeb) | 518 | if (stats.free_blocks < avefreeb) |
483 | continue; | 519 | continue; |
484 | *group = grp; | 520 | grp = g; |
485 | ret = 0; | 521 | ret = 0; |
486 | best_ndir = ext4_used_dirs_count(sb, desc); | 522 | best_ndir = stats.used_dirs; |
523 | } | ||
524 | if (ret) | ||
525 | goto fallback; | ||
526 | found_flex_bg: | ||
527 | if (flex_size == 1) { | ||
528 | *group = grp; | ||
529 | return 0; | ||
530 | } | ||
531 | |||
532 | /* | ||
533 | * We pack inodes at the beginning of the flexgroup's | ||
534 | * inode tables. Block allocation decisions will do | ||
535 | * something similar, although regular files will | ||
536 | * start at 2nd block group of the flexgroup. See | ||
537 | * ext4_ext_find_goal() and ext4_find_near(). | ||
538 | */ | ||
539 | grp *= flex_size; | ||
540 | for (i = 0; i < flex_size; i++) { | ||
541 | if (grp+i >= sbi->s_groups_count) | ||
542 | break; | ||
543 | desc = ext4_get_group_desc(sb, grp+i, NULL); | ||
544 | if (desc && ext4_free_inodes_count(sb, desc)) { | ||
545 | *group = grp+i; | ||
546 | return 0; | ||
547 | } | ||
487 | } | 548 | } |
488 | if (ret == 0) | ||
489 | return ret; | ||
490 | goto fallback; | 549 | goto fallback; |
491 | } | 550 | } |
492 | 551 | ||
493 | blocks_per_dir = ext4_blocks_count(es) - freeb; | ||
494 | do_div(blocks_per_dir, ndirs); | ||
495 | |||
496 | max_dirs = ndirs / ngroups + inodes_per_group / 16; | 552 | max_dirs = ndirs / ngroups + inodes_per_group / 16; |
497 | min_inodes = avefreei - inodes_per_group / 4; | 553 | min_inodes = avefreei - inodes_per_group*flex_size / 4; |
498 | min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4; | 554 | if (min_inodes < 1) |
499 | 555 | min_inodes = 1; | |
500 | max_debt = EXT4_BLOCKS_PER_GROUP(sb); | 556 | min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4; |
501 | max_debt /= max_t(int, blocks_per_dir, BLOCK_COST); | 557 | |
502 | if (max_debt * INODE_COST > inodes_per_group) | 558 | /* |
503 | max_debt = inodes_per_group / INODE_COST; | 559 | * Start looking in the flex group where we last allocated an |
504 | if (max_debt > 255) | 560 | * inode for this parent directory |
505 | max_debt = 255; | 561 | */ |
506 | if (max_debt == 0) | 562 | if (EXT4_I(parent)->i_last_alloc_group != ~0) { |
507 | max_debt = 1; | 563 | parent_group = EXT4_I(parent)->i_last_alloc_group; |
564 | if (flex_size > 1) | ||
565 | parent_group >>= sbi->s_log_groups_per_flex; | ||
566 | } | ||
508 | 567 | ||
509 | for (i = 0; i < ngroups; i++) { | 568 | for (i = 0; i < ngroups; i++) { |
510 | *group = (parent_group + i) % ngroups; | 569 | grp = (parent_group + i) % ngroups; |
511 | desc = ext4_get_group_desc(sb, *group, NULL); | 570 | get_orlov_stats(sb, grp, flex_size, &stats); |
512 | if (!desc || !ext4_free_inodes_count(sb, desc)) | 571 | if (stats.used_dirs >= max_dirs) |
513 | continue; | ||
514 | if (ext4_used_dirs_count(sb, desc) >= max_dirs) | ||
515 | continue; | 572 | continue; |
516 | if (ext4_free_inodes_count(sb, desc) < min_inodes) | 573 | if (stats.free_inodes < min_inodes) |
517 | continue; | 574 | continue; |
518 | if (ext4_free_blks_count(sb, desc) < min_blocks) | 575 | if (stats.free_blocks < min_blocks) |
519 | continue; | 576 | continue; |
520 | return 0; | 577 | goto found_flex_bg; |
521 | } | 578 | } |
522 | 579 | ||
523 | fallback: | 580 | fallback: |
581 | ngroups = sbi->s_groups_count; | ||
582 | avefreei = freei / ngroups; | ||
583 | parent_group = EXT4_I(parent)->i_block_group; | ||
524 | for (i = 0; i < ngroups; i++) { | 584 | for (i = 0; i < ngroups; i++) { |
525 | *group = (parent_group + i) % ngroups; | 585 | grp = (parent_group + i) % ngroups; |
526 | desc = ext4_get_group_desc(sb, *group, NULL); | 586 | desc = ext4_get_group_desc(sb, grp, NULL); |
527 | if (desc && ext4_free_inodes_count(sb, desc) && | 587 | if (desc && ext4_free_inodes_count(sb, desc) && |
528 | ext4_free_inodes_count(sb, desc) >= avefreei) | 588 | ext4_free_inodes_count(sb, desc) >= avefreei) { |
589 | *group = grp; | ||
529 | return 0; | 590 | return 0; |
591 | } | ||
530 | } | 592 | } |
531 | 593 | ||
532 | if (avefreei) { | 594 | if (avefreei) { |
@@ -542,12 +604,51 @@ fallback: | |||
542 | } | 604 | } |
543 | 605 | ||
544 | static int find_group_other(struct super_block *sb, struct inode *parent, | 606 | static int find_group_other(struct super_block *sb, struct inode *parent, |
545 | ext4_group_t *group) | 607 | ext4_group_t *group, int mode) |
546 | { | 608 | { |
547 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | 609 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; |
548 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | 610 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; |
549 | struct ext4_group_desc *desc; | 611 | struct ext4_group_desc *desc; |
550 | ext4_group_t i; | 612 | ext4_group_t i, last; |
613 | int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); | ||
614 | |||
615 | /* | ||
616 | * Try to place the inode is the same flex group as its | ||
617 | * parent. If we can't find space, use the Orlov algorithm to | ||
618 | * find another flex group, and store that information in the | ||
619 | * parent directory's inode information so that use that flex | ||
620 | * group for future allocations. | ||
621 | */ | ||
622 | if (flex_size > 1) { | ||
623 | int retry = 0; | ||
624 | |||
625 | try_again: | ||
626 | parent_group &= ~(flex_size-1); | ||
627 | last = parent_group + flex_size; | ||
628 | if (last > ngroups) | ||
629 | last = ngroups; | ||
630 | for (i = parent_group; i < last; i++) { | ||
631 | desc = ext4_get_group_desc(sb, i, NULL); | ||
632 | if (desc && ext4_free_inodes_count(sb, desc)) { | ||
633 | *group = i; | ||
634 | return 0; | ||
635 | } | ||
636 | } | ||
637 | if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) { | ||
638 | retry = 1; | ||
639 | parent_group = EXT4_I(parent)->i_last_alloc_group; | ||
640 | goto try_again; | ||
641 | } | ||
642 | /* | ||
643 | * If this didn't work, use the Orlov search algorithm | ||
644 | * to find a new flex group; we pass in the mode to | ||
645 | * avoid the topdir algorithms. | ||
646 | */ | ||
647 | *group = parent_group + flex_size; | ||
648 | if (*group > ngroups) | ||
649 | *group = 0; | ||
650 | return find_group_orlov(sb, parent, group, mode); | ||
651 | } | ||
551 | 652 | ||
552 | /* | 653 | /* |
553 | * Try to place the inode in its parent directory | 654 | * Try to place the inode in its parent directory |
@@ -716,10 +817,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | |||
716 | sbi = EXT4_SB(sb); | 817 | sbi = EXT4_SB(sb); |
717 | es = sbi->s_es; | 818 | es = sbi->s_es; |
718 | 819 | ||
719 | if (sbi->s_log_groups_per_flex) { | 820 | if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { |
720 | ret2 = find_group_flex(sb, dir, &group); | 821 | ret2 = find_group_flex(sb, dir, &group); |
721 | if (ret2 == -1) { | 822 | if (ret2 == -1) { |
722 | ret2 = find_group_other(sb, dir, &group); | 823 | ret2 = find_group_other(sb, dir, &group, mode); |
723 | if (ret2 == 0 && once) | 824 | if (ret2 == 0 && once) |
724 | once = 0; | 825 | once = 0; |
725 | printk(KERN_NOTICE "ext4: find_group_flex " | 826 | printk(KERN_NOTICE "ext4: find_group_flex " |
@@ -733,11 +834,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | |||
733 | if (test_opt(sb, OLDALLOC)) | 834 | if (test_opt(sb, OLDALLOC)) |
734 | ret2 = find_group_dir(sb, dir, &group); | 835 | ret2 = find_group_dir(sb, dir, &group); |
735 | else | 836 | else |
736 | ret2 = find_group_orlov(sb, dir, &group); | 837 | ret2 = find_group_orlov(sb, dir, &group, mode); |
737 | } else | 838 | } else |
738 | ret2 = find_group_other(sb, dir, &group); | 839 | ret2 = find_group_other(sb, dir, &group, mode); |
739 | 840 | ||
740 | got_group: | 841 | got_group: |
842 | EXT4_I(dir)->i_last_alloc_group = group; | ||
741 | err = -ENOSPC; | 843 | err = -ENOSPC; |
742 | if (ret2 == -1) | 844 | if (ret2 == -1) |
743 | goto out; | 845 | goto out; |
@@ -894,6 +996,7 @@ got: | |||
894 | ei->i_file_acl = 0; | 996 | ei->i_file_acl = 0; |
895 | ei->i_dtime = 0; | 997 | ei->i_dtime = 0; |
896 | ei->i_block_group = group; | 998 | ei->i_block_group = group; |
999 | ei->i_last_alloc_group = ~0; | ||
897 | 1000 | ||
898 | ext4_set_inode_flags(inode); | 1001 | ext4_set_inode_flags(inode); |
899 | if (IS_DIRSYNC(inode)) | 1002 | if (IS_DIRSYNC(inode)) |