aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMingming Cao <cmm@us.ibm.com>2005-06-28 23:45:16 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-29 00:20:35 -0400
commit21fe3471c3aaa5c489c5d3a4d705291eb7511248 (patch)
treed1074604279a899f617d6653b42a31e01226f824
parentfb3cc4320e1fd87143683b540e459a2e20fdc9bb (diff)
[PATCH] ext3: reduce allocate-with-reservation lock latencies
Currently in ext3 block reservation code, the global filesystem reservation tree lock (rsv_block) is hold during the process of searching for a space to make a new reservation window, including while scaning the block bitmap to verify if the avalible window has a free block. Holding the lock during bitmap scan is unnecessary and could possibly cause scalability issue and latency issues. This patch tries to address this by dropping the lock before scan the bitmap. Before that we need to reserve the open window in case someone else is targetting at the same window. Question was should we reserve the whole free reservable space or just the window size we need. Reserve the whole free reservable space will possibly force other threads which intended to do block allocation nearby move to another block group(cause bad layout). In this patch, we just reserve the desired size before drop the lock and scan the block bitmap. This patch fixed a ext3 reservation latency issue seen on a cvs check out test. Patch is tested with many fsx, tiobench, dbench and untar a kernel test. Signed-Off-By: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/ext3/balloc.c135
-rw-r--r--fs/ext3/file.c4
2 files changed, 67 insertions, 72 deletions
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index ccd632fcc6d8..e463dca008e4 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -749,24 +749,24 @@ fail_access:
749 * to find a free region that is of my size and has not 749 * to find a free region that is of my size and has not
750 * been reserved. 750 * been reserved.
751 * 751 *
752 * on succeed, it returns the reservation window to be appended to.
753 * failed, return NULL.
754 */ 752 */
755static struct ext3_reserve_window_node *find_next_reservable_window( 753static int find_next_reservable_window(
756 struct ext3_reserve_window_node *search_head, 754 struct ext3_reserve_window_node *search_head,
757 unsigned long size, int *start_block, 755 struct ext3_reserve_window_node *my_rsv,
756 struct super_block * sb, int start_block,
758 int last_block) 757 int last_block)
759{ 758{
760 struct rb_node *next; 759 struct rb_node *next;
761 struct ext3_reserve_window_node *rsv, *prev; 760 struct ext3_reserve_window_node *rsv, *prev;
762 int cur; 761 int cur;
762 int size = my_rsv->rsv_goal_size;
763 763
764 /* TODO: make the start of the reservation window byte-aligned */ 764 /* TODO: make the start of the reservation window byte-aligned */
765 /* cur = *start_block & ~7;*/ 765 /* cur = *start_block & ~7;*/
766 cur = *start_block; 766 cur = start_block;
767 rsv = search_head; 767 rsv = search_head;
768 if (!rsv) 768 if (!rsv)
769 return NULL; 769 return -1;
770 770
771 while (1) { 771 while (1) {
772 if (cur <= rsv->rsv_end) 772 if (cur <= rsv->rsv_end)
@@ -782,11 +782,11 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
782 * space with expected-size (or more)... 782 * space with expected-size (or more)...
783 */ 783 */
784 if (cur > last_block) 784 if (cur > last_block)
785 return NULL; /* fail */ 785 return -1; /* fail */
786 786
787 prev = rsv; 787 prev = rsv;
788 next = rb_next(&rsv->rsv_node); 788 next = rb_next(&rsv->rsv_node);
789 rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node); 789 rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node);
790 790
791 /* 791 /*
792 * Reached the last reservation, we can just append to the 792 * Reached the last reservation, we can just append to the
@@ -813,8 +813,25 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
813 * return the reservation window that we could append to. 813 * return the reservation window that we could append to.
814 * succeed. 814 * succeed.
815 */ 815 */
816 *start_block = cur; 816
817 return prev; 817 if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
818 rsv_window_remove(sb, my_rsv);
819
820 /*
821 * Let's book the whole avaliable window for now. We will check the
822 * disk bitmap later and then, if there are free blocks then we adjust
823 * the window size if it's larger than requested.
824 * Otherwise, we will remove this node from the tree next time
825 * call find_next_reservable_window.
826 */
827 my_rsv->rsv_start = cur;
828 my_rsv->rsv_end = cur + size - 1;
829 my_rsv->rsv_alloc_hit = 0;
830
831 if (prev != my_rsv)
832 ext3_rsv_window_add(sb, my_rsv);
833
834 return 0;
818} 835}
819 836
820/** 837/**
@@ -852,6 +869,7 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
852 * @sb: the super block 869 * @sb: the super block
853 * @group: the group we are trying to allocate in 870 * @group: the group we are trying to allocate in
854 * @bitmap_bh: the block group block bitmap 871 * @bitmap_bh: the block group block bitmap
872 *
855 */ 873 */
856static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, 874static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
857 int goal, struct super_block *sb, 875 int goal, struct super_block *sb,
@@ -860,10 +878,10 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
860 struct ext3_reserve_window_node *search_head; 878 struct ext3_reserve_window_node *search_head;
861 int group_first_block, group_end_block, start_block; 879 int group_first_block, group_end_block, start_block;
862 int first_free_block; 880 int first_free_block;
863 int reservable_space_start;
864 struct ext3_reserve_window_node *prev_rsv;
865 struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; 881 struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
866 unsigned long size; 882 unsigned long size;
883 int ret;
884 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
867 885
868 group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + 886 group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) +
869 group * EXT3_BLOCKS_PER_GROUP(sb); 887 group * EXT3_BLOCKS_PER_GROUP(sb);
@@ -875,6 +893,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
875 start_block = goal + group_first_block; 893 start_block = goal + group_first_block;
876 894
877 size = my_rsv->rsv_goal_size; 895 size = my_rsv->rsv_goal_size;
896
878 if (!rsv_is_empty(&my_rsv->rsv_window)) { 897 if (!rsv_is_empty(&my_rsv->rsv_window)) {
879 /* 898 /*
880 * if the old reservation is cross group boundary 899 * if the old reservation is cross group boundary
@@ -908,6 +927,8 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
908 my_rsv->rsv_goal_size= size; 927 my_rsv->rsv_goal_size= size;
909 } 928 }
910 } 929 }
930
931 spin_lock(rsv_lock);
911 /* 932 /*
912 * shift the search start to the window near the goal block 933 * shift the search start to the window near the goal block
913 */ 934 */
@@ -921,11 +942,16 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
921 * need to check the bitmap after we found a reservable window. 942 * need to check the bitmap after we found a reservable window.
922 */ 943 */
923retry: 944retry:
924 prev_rsv = find_next_reservable_window(search_head, size, 945 ret = find_next_reservable_window(search_head, my_rsv, sb,
925 &start_block, group_end_block); 946 start_block, group_end_block);
926 if (prev_rsv == NULL) 947
927 goto failed; 948 if (ret == -1) {
928 reservable_space_start = start_block; 949 if (!rsv_is_empty(&my_rsv->rsv_window))
950 rsv_window_remove(sb, my_rsv);
951 spin_unlock(rsv_lock);
952 return -1;
953 }
954
929 /* 955 /*
930 * On success, find_next_reservable_window() returns the 956 * On success, find_next_reservable_window() returns the
931 * reservation window where there is a reservable space after it. 957 * reservation window where there is a reservable space after it.
@@ -937,8 +963,9 @@ retry:
937 * block. Search start from the start block of the reservable space 963 * block. Search start from the start block of the reservable space
938 * we just found. 964 * we just found.
939 */ 965 */
966 spin_unlock(rsv_lock);
940 first_free_block = bitmap_search_next_usable_block( 967 first_free_block = bitmap_search_next_usable_block(
941 reservable_space_start - group_first_block, 968 my_rsv->rsv_start - group_first_block,
942 bitmap_bh, group_end_block - group_first_block + 1); 969 bitmap_bh, group_end_block - group_first_block + 1);
943 970
944 if (first_free_block < 0) { 971 if (first_free_block < 0) {
@@ -946,54 +973,29 @@ retry:
946 * no free block left on the bitmap, no point 973 * no free block left on the bitmap, no point
947 * to reserve the space. return failed. 974 * to reserve the space. return failed.
948 */ 975 */
949 goto failed; 976 spin_lock(rsv_lock);
977 if (!rsv_is_empty(&my_rsv->rsv_window))
978 rsv_window_remove(sb, my_rsv);
979 spin_unlock(rsv_lock);
980 return -1; /* failed */
950 } 981 }
982
951 start_block = first_free_block + group_first_block; 983 start_block = first_free_block + group_first_block;
952 /* 984 /*
953 * check if the first free block is within the 985 * check if the first free block is within the
954 * free space we just found 986 * free space we just reserved
955 */ 987 */
956 if ((start_block >= reservable_space_start) && 988 if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end)
957 (start_block < reservable_space_start + size)) 989 return 0; /* success */
958 goto found_rsv_window;
959 /* 990 /*
960 * if the first free bit we found is out of the reservable space 991 * if the first free bit we found is out of the reservable space
961 * this means there is no free block on the reservable space 992 * continue search for next reservable space,
962 * we should continue search for next reservable space,
963 * start from where the free block is, 993 * start from where the free block is,
964 * we also shift the list head to where we stopped last time 994 * we also shift the list head to where we stopped last time
965 */ 995 */
966 search_head = prev_rsv; 996 search_head = my_rsv;
997 spin_lock(rsv_lock);
967 goto retry; 998 goto retry;
968
969found_rsv_window:
970 /*
971 * great! the reservable space contains some free blocks.
972 * if the search returns that we should add the new
973 * window just next to where the old window, we don't
974 * need to remove the old window first then add it to the
975 * same place, just update the new start and new end.
976 */
977 if (my_rsv != prev_rsv) {
978 if (!rsv_is_empty(&my_rsv->rsv_window))
979 rsv_window_remove(sb, my_rsv);
980 }
981 my_rsv->rsv_start = reservable_space_start;
982 my_rsv->rsv_end = my_rsv->rsv_start + size - 1;
983 my_rsv->rsv_alloc_hit = 0;
984 if (my_rsv != prev_rsv) {
985 ext3_rsv_window_add(sb, my_rsv);
986 }
987 return 0; /* succeed */
988failed:
989 /*
990 * failed to find a new reservation window in the current
991 * group, remove the current(stale) reservation window
992 * if there is any
993 */
994 if (!rsv_is_empty(&my_rsv->rsv_window))
995 rsv_window_remove(sb, my_rsv);
996 return -1; /* failed */
997} 999}
998 1000
999/* 1001/*
@@ -1023,7 +1025,6 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1023 int goal, struct ext3_reserve_window_node * my_rsv, 1025 int goal, struct ext3_reserve_window_node * my_rsv,
1024 int *errp) 1026 int *errp)
1025{ 1027{
1026 spinlock_t *rsv_lock;
1027 unsigned long group_first_block; 1028 unsigned long group_first_block;
1028 int ret = 0; 1029 int ret = 0;
1029 int fatal; 1030 int fatal;
@@ -1052,7 +1053,6 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1052 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL); 1053 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL);
1053 goto out; 1054 goto out;
1054 } 1055 }
1055 rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
1056 /* 1056 /*
1057 * goal is a group relative block number (if there is a goal) 1057 * goal is a group relative block number (if there is a goal)
1058 * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb) 1058 * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb)
@@ -1078,30 +1078,21 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1078 * then we could go to allocate from the reservation window directly. 1078 * then we could go to allocate from the reservation window directly.
1079 */ 1079 */
1080 while (1) { 1080 while (1) {
1081 struct ext3_reserve_window rsv_copy; 1081 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
1082 1082 !goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) {
1083 rsv_copy._rsv_start = my_rsv->rsv_start;
1084 rsv_copy._rsv_end = my_rsv->rsv_end;
1085
1086 if (rsv_is_empty(&rsv_copy) || (ret < 0) ||
1087 !goal_in_my_reservation(&rsv_copy, goal, group, sb)) {
1088 spin_lock(rsv_lock);
1089 ret = alloc_new_reservation(my_rsv, goal, sb, 1083 ret = alloc_new_reservation(my_rsv, goal, sb,
1090 group, bitmap_bh); 1084 group, bitmap_bh);
1091 rsv_copy._rsv_start = my_rsv->rsv_start;
1092 rsv_copy._rsv_end = my_rsv->rsv_end;
1093 spin_unlock(rsv_lock);
1094 if (ret < 0) 1085 if (ret < 0)
1095 break; /* failed */ 1086 break; /* failed */
1096 1087
1097 if (!goal_in_my_reservation(&rsv_copy, goal, group, sb)) 1088 if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb))
1098 goal = -1; 1089 goal = -1;
1099 } 1090 }
1100 if ((rsv_copy._rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) 1091 if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
1101 || (rsv_copy._rsv_end < group_first_block)) 1092 || (my_rsv->rsv_end < group_first_block))
1102 BUG(); 1093 BUG();
1103 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, 1094 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal,
1104 &rsv_copy); 1095 &my_rsv->rsv_window);
1105 if (ret >= 0) { 1096 if (ret >= 0) {
1106 my_rsv->rsv_alloc_hit++; 1097 my_rsv->rsv_alloc_hit++;
1107 break; /* succeed */ 1098 break; /* succeed */
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 5ad8cf0292df..98e78345ead9 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -36,7 +36,11 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
36 /* if we are the last writer on the inode, drop the block reservation */ 36 /* if we are the last writer on the inode, drop the block reservation */
37 if ((filp->f_mode & FMODE_WRITE) && 37 if ((filp->f_mode & FMODE_WRITE) &&
38 (atomic_read(&inode->i_writecount) == 1)) 38 (atomic_read(&inode->i_writecount) == 1))
39 {
40 down(&EXT3_I(inode)->truncate_sem);
39 ext3_discard_reservation(inode); 41 ext3_discard_reservation(inode);
42 up(&EXT3_I(inode)->truncate_sem);
43 }
40 if (is_dx(inode) && filp->private_data) 44 if (is_dx(inode) && filp->private_data)
41 ext3_htree_free_dir_info(filp->private_data); 45 ext3_htree_free_dir_info(filp->private_data);
42 46