diff options
author | Tao Ma <tao.ma@oracle.com> | 2009-02-24 11:53:25 -0500 |
---|---|---|
committer | Mark Fasheh <mfasheh@suse.com> | 2009-04-03 14:39:18 -0400 |
commit | feb473a6e8bd19297d0f3bb377b25055c0228c0a (patch) | |
tree | 4ceae3a185fc430c8927dc0bbb197a3694c37194 /fs/ocfs2 | |
parent | 60ca81e82dae4aa2e8ae84cf96b4d08535931669 (diff) |
ocfs2: Optimize inode group allocation by recording last used group.
In ocfs2, the block group search looks for the "emptiest" group
to allocate from. So if the allocator has many equally(or almost
equally) empty groups, new block group will tend to get spread
out amongst them.
So we add osb_inode_alloc_group in ocfs2_super to record the last
used inode allocation group.
For more details, please see
http://oss.oracle.com/osswiki/OCFS2/DesignDocs/InodeAllocationStrategy.
I have done some basic test and the results are a ten times improvement on
some cold-cache stat workloads.
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/ocfs2.h | 3 | ||||
-rw-r--r-- | fs/ocfs2/suballoc.c | 32 |
2 files changed, 31 insertions, 4 deletions
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index e1844d5736c..b65d19c9756 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -343,6 +343,9 @@ struct ocfs2_super | |||
343 | 343 | ||
344 | unsigned int osb_dx_mask; | 344 | unsigned int osb_dx_mask; |
345 | u32 osb_dx_seed[4]; | 345 | u32 osb_dx_seed[4]; |
346 | |||
347 | /* the group we used to allocate inodes. */ | ||
348 | u64 osb_inode_alloc_group; | ||
346 | }; | 349 | }; |
347 | 350 | ||
348 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) | 351 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index b7a065e87cb..4c1399cc03f 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -66,6 +66,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
66 | struct inode *alloc_inode, | 66 | struct inode *alloc_inode, |
67 | struct buffer_head *bh, | 67 | struct buffer_head *bh, |
68 | u64 max_block, | 68 | u64 max_block, |
69 | u64 *last_alloc_group, | ||
69 | int flags); | 70 | int flags); |
70 | 71 | ||
71 | static int ocfs2_cluster_group_search(struct inode *inode, | 72 | static int ocfs2_cluster_group_search(struct inode *inode, |
@@ -407,6 +408,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
407 | struct inode *alloc_inode, | 408 | struct inode *alloc_inode, |
408 | struct buffer_head *bh, | 409 | struct buffer_head *bh, |
409 | u64 max_block, | 410 | u64 max_block, |
411 | u64 *last_alloc_group, | ||
410 | int flags) | 412 | int flags) |
411 | { | 413 | { |
412 | int status, credits; | 414 | int status, credits; |
@@ -444,6 +446,11 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
444 | goto bail; | 446 | goto bail; |
445 | } | 447 | } |
446 | 448 | ||
449 | if (last_alloc_group && *last_alloc_group != 0) { | ||
450 | mlog(0, "use old allocation group %llu for block group alloc\n", | ||
451 | (unsigned long long)*last_alloc_group); | ||
452 | ac->ac_last_group = *last_alloc_group; | ||
453 | } | ||
447 | status = ocfs2_claim_clusters(osb, | 454 | status = ocfs2_claim_clusters(osb, |
448 | handle, | 455 | handle, |
449 | ac, | 456 | ac, |
@@ -518,6 +525,11 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
518 | alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); | 525 | alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); |
519 | 526 | ||
520 | status = 0; | 527 | status = 0; |
528 | |||
529 | /* save the new last alloc group so that the caller can cache it. */ | ||
530 | if (last_alloc_group) | ||
531 | *last_alloc_group = ac->ac_last_group; | ||
532 | |||
521 | bail: | 533 | bail: |
522 | if (handle) | 534 | if (handle) |
523 | ocfs2_commit_trans(osb, handle); | 535 | ocfs2_commit_trans(osb, handle); |
@@ -535,6 +547,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
535 | struct ocfs2_alloc_context *ac, | 547 | struct ocfs2_alloc_context *ac, |
536 | int type, | 548 | int type, |
537 | u32 slot, | 549 | u32 slot, |
550 | u64 *last_alloc_group, | ||
538 | int flags) | 551 | int flags) |
539 | { | 552 | { |
540 | int status; | 553 | int status; |
@@ -600,7 +613,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
600 | } | 613 | } |
601 | 614 | ||
602 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh, | 615 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh, |
603 | ac->ac_max_block, flags); | 616 | ac->ac_max_block, |
617 | last_alloc_group, flags); | ||
604 | if (status < 0) { | 618 | if (status < 0) { |
605 | if (status != -ENOSPC) | 619 | if (status != -ENOSPC) |
606 | mlog_errno(status); | 620 | mlog_errno(status); |
@@ -644,7 +658,7 @@ int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, | |||
644 | 658 | ||
645 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), | 659 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), |
646 | EXTENT_ALLOC_SYSTEM_INODE, | 660 | EXTENT_ALLOC_SYSTEM_INODE, |
647 | slot, ALLOC_NEW_GROUP); | 661 | slot, NULL, ALLOC_NEW_GROUP); |
648 | if (status < 0) { | 662 | if (status < 0) { |
649 | if (status != -ENOSPC) | 663 | if (status != -ENOSPC) |
650 | mlog_errno(status); | 664 | mlog_errno(status); |
@@ -690,7 +704,8 @@ static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, | |||
690 | 704 | ||
691 | status = ocfs2_reserve_suballoc_bits(osb, ac, | 705 | status = ocfs2_reserve_suballoc_bits(osb, ac, |
692 | INODE_ALLOC_SYSTEM_INODE, | 706 | INODE_ALLOC_SYSTEM_INODE, |
693 | slot, NOT_ALLOC_NEW_GROUP); | 707 | slot, NULL, |
708 | NOT_ALLOC_NEW_GROUP); | ||
694 | if (status >= 0) { | 709 | if (status >= 0) { |
695 | ocfs2_set_inode_steal_slot(osb, slot); | 710 | ocfs2_set_inode_steal_slot(osb, slot); |
696 | break; | 711 | break; |
@@ -707,6 +722,7 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
707 | { | 722 | { |
708 | int status; | 723 | int status; |
709 | s16 slot = ocfs2_get_inode_steal_slot(osb); | 724 | s16 slot = ocfs2_get_inode_steal_slot(osb); |
725 | u64 alloc_group; | ||
710 | 726 | ||
711 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); | 727 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); |
712 | if (!(*ac)) { | 728 | if (!(*ac)) { |
@@ -742,14 +758,22 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
742 | goto inode_steal; | 758 | goto inode_steal; |
743 | 759 | ||
744 | atomic_set(&osb->s_num_inodes_stolen, 0); | 760 | atomic_set(&osb->s_num_inodes_stolen, 0); |
761 | alloc_group = osb->osb_inode_alloc_group; | ||
745 | status = ocfs2_reserve_suballoc_bits(osb, *ac, | 762 | status = ocfs2_reserve_suballoc_bits(osb, *ac, |
746 | INODE_ALLOC_SYSTEM_INODE, | 763 | INODE_ALLOC_SYSTEM_INODE, |
747 | osb->slot_num, | 764 | osb->slot_num, |
765 | &alloc_group, | ||
748 | ALLOC_NEW_GROUP | | 766 | ALLOC_NEW_GROUP | |
749 | ALLOC_GROUPS_FROM_GLOBAL); | 767 | ALLOC_GROUPS_FROM_GLOBAL); |
750 | if (status >= 0) { | 768 | if (status >= 0) { |
751 | status = 0; | 769 | status = 0; |
752 | 770 | ||
771 | spin_lock(&osb->osb_lock); | ||
772 | osb->osb_inode_alloc_group = alloc_group; | ||
773 | spin_unlock(&osb->osb_lock); | ||
774 | mlog(0, "after reservation, new allocation group is " | ||
775 | "%llu\n", (unsigned long long)alloc_group); | ||
776 | |||
753 | /* | 777 | /* |
754 | * Some inodes must be freed by us, so try to allocate | 778 | * Some inodes must be freed by us, so try to allocate |
755 | * from our own next time. | 779 | * from our own next time. |
@@ -796,7 +820,7 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | |||
796 | 820 | ||
797 | status = ocfs2_reserve_suballoc_bits(osb, ac, | 821 | status = ocfs2_reserve_suballoc_bits(osb, ac, |
798 | GLOBAL_BITMAP_SYSTEM_INODE, | 822 | GLOBAL_BITMAP_SYSTEM_INODE, |
799 | OCFS2_INVALID_SLOT, | 823 | OCFS2_INVALID_SLOT, NULL, |
800 | ALLOC_NEW_GROUP); | 824 | ALLOC_NEW_GROUP); |
801 | if (status < 0 && status != -ENOSPC) { | 825 | if (status < 0 && status != -ENOSPC) { |
802 | mlog_errno(status); | 826 | mlog_errno(status); |