diff options
author | Mingming Cao <cmm@us.ibm.com> | 2005-06-28 23:45:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-29 00:20:35 -0400 |
commit | 21fe3471c3aaa5c489c5d3a4d705291eb7511248 (patch) | |
tree | d1074604279a899f617d6653b42a31e01226f824 | |
parent | fb3cc4320e1fd87143683b540e459a2e20fdc9bb (diff) |
[PATCH] ext3: reduce allocate-with-reservation lock latencies
Currently in ext3 block reservation code, the global filesystem reservation
tree lock (rsv_block) is hold during the process of searching for a space
to make a new reservation window, including while scaning the block bitmap
to verify if the avalible window has a free block. Holding the lock during
bitmap scan is unnecessary and could possibly cause scalability issue and
latency issues.
This patch tries to address this by dropping the lock before scan the
bitmap. Before that we need to reserve the open window in case someone
else is targetting at the same window. Question was should we reserve the
whole free reservable space or just the window size we need. Reserve the
whole free reservable space will possibly force other threads which
intended to do block allocation nearby move to another block group(cause
bad layout). In this patch, we just reserve the desired size before drop
the lock and scan the block bitmap. This patch fixed a ext3 reservation
latency issue seen on a cvs check out test. Patch is tested with many fsx,
tiobench, dbench and untar a kernel test.
Signed-Off-By: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/ext3/balloc.c | 135 | ||||
-rw-r--r-- | fs/ext3/file.c | 4 |
2 files changed, 67 insertions, 72 deletions
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index ccd632fcc6d..e463dca008e 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
@@ -749,24 +749,24 @@ fail_access: | |||
749 | * to find a free region that is of my size and has not | 749 | * to find a free region that is of my size and has not |
750 | * been reserved. | 750 | * been reserved. |
751 | * | 751 | * |
752 | * on succeed, it returns the reservation window to be appended to. | ||
753 | * failed, return NULL. | ||
754 | */ | 752 | */ |
755 | static struct ext3_reserve_window_node *find_next_reservable_window( | 753 | static int find_next_reservable_window( |
756 | struct ext3_reserve_window_node *search_head, | 754 | struct ext3_reserve_window_node *search_head, |
757 | unsigned long size, int *start_block, | 755 | struct ext3_reserve_window_node *my_rsv, |
756 | struct super_block * sb, int start_block, | ||
758 | int last_block) | 757 | int last_block) |
759 | { | 758 | { |
760 | struct rb_node *next; | 759 | struct rb_node *next; |
761 | struct ext3_reserve_window_node *rsv, *prev; | 760 | struct ext3_reserve_window_node *rsv, *prev; |
762 | int cur; | 761 | int cur; |
762 | int size = my_rsv->rsv_goal_size; | ||
763 | 763 | ||
764 | /* TODO: make the start of the reservation window byte-aligned */ | 764 | /* TODO: make the start of the reservation window byte-aligned */ |
765 | /* cur = *start_block & ~7;*/ | 765 | /* cur = *start_block & ~7;*/ |
766 | cur = *start_block; | 766 | cur = start_block; |
767 | rsv = search_head; | 767 | rsv = search_head; |
768 | if (!rsv) | 768 | if (!rsv) |
769 | return NULL; | 769 | return -1; |
770 | 770 | ||
771 | while (1) { | 771 | while (1) { |
772 | if (cur <= rsv->rsv_end) | 772 | if (cur <= rsv->rsv_end) |
@@ -782,11 +782,11 @@ static struct ext3_reserve_window_node *find_next_reservable_window( | |||
782 | * space with expected-size (or more)... | 782 | * space with expected-size (or more)... |
783 | */ | 783 | */ |
784 | if (cur > last_block) | 784 | if (cur > last_block) |
785 | return NULL; /* fail */ | 785 | return -1; /* fail */ |
786 | 786 | ||
787 | prev = rsv; | 787 | prev = rsv; |
788 | next = rb_next(&rsv->rsv_node); | 788 | next = rb_next(&rsv->rsv_node); |
789 | rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node); | 789 | rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node); |
790 | 790 | ||
791 | /* | 791 | /* |
792 | * Reached the last reservation, we can just append to the | 792 | * Reached the last reservation, we can just append to the |
@@ -813,8 +813,25 @@ static struct ext3_reserve_window_node *find_next_reservable_window( | |||
813 | * return the reservation window that we could append to. | 813 | * return the reservation window that we could append to. |
814 | * succeed. | 814 | * succeed. |
815 | */ | 815 | */ |
816 | *start_block = cur; | 816 | |
817 | return prev; | 817 | if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) |
818 | rsv_window_remove(sb, my_rsv); | ||
819 | |||
820 | /* | ||
821 | * Let's book the whole avaliable window for now. We will check the | ||
822 | * disk bitmap later and then, if there are free blocks then we adjust | ||
823 | * the window size if it's larger than requested. | ||
824 | * Otherwise, we will remove this node from the tree next time | ||
825 | * call find_next_reservable_window. | ||
826 | */ | ||
827 | my_rsv->rsv_start = cur; | ||
828 | my_rsv->rsv_end = cur + size - 1; | ||
829 | my_rsv->rsv_alloc_hit = 0; | ||
830 | |||
831 | if (prev != my_rsv) | ||
832 | ext3_rsv_window_add(sb, my_rsv); | ||
833 | |||
834 | return 0; | ||
818 | } | 835 | } |
819 | 836 | ||
820 | /** | 837 | /** |
@@ -852,6 +869,7 @@ static struct ext3_reserve_window_node *find_next_reservable_window( | |||
852 | * @sb: the super block | 869 | * @sb: the super block |
853 | * @group: the group we are trying to allocate in | 870 | * @group: the group we are trying to allocate in |
854 | * @bitmap_bh: the block group block bitmap | 871 | * @bitmap_bh: the block group block bitmap |
872 | * | ||
855 | */ | 873 | */ |
856 | static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, | 874 | static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, |
857 | int goal, struct super_block *sb, | 875 | int goal, struct super_block *sb, |
@@ -860,10 +878,10 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, | |||
860 | struct ext3_reserve_window_node *search_head; | 878 | struct ext3_reserve_window_node *search_head; |
861 | int group_first_block, group_end_block, start_block; | 879 | int group_first_block, group_end_block, start_block; |
862 | int first_free_block; | 880 | int first_free_block; |
863 | int reservable_space_start; | ||
864 | struct ext3_reserve_window_node *prev_rsv; | ||
865 | struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; | 881 | struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; |
866 | unsigned long size; | 882 | unsigned long size; |
883 | int ret; | ||
884 | spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; | ||
867 | 885 | ||
868 | group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + | 886 | group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + |
869 | group * EXT3_BLOCKS_PER_GROUP(sb); | 887 | group * EXT3_BLOCKS_PER_GROUP(sb); |
@@ -875,6 +893,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, | |||
875 | start_block = goal + group_first_block; | 893 | start_block = goal + group_first_block; |
876 | 894 | ||
877 | size = my_rsv->rsv_goal_size; | 895 | size = my_rsv->rsv_goal_size; |
896 | |||
878 | if (!rsv_is_empty(&my_rsv->rsv_window)) { | 897 | if (!rsv_is_empty(&my_rsv->rsv_window)) { |
879 | /* | 898 | /* |
880 | * if the old reservation is cross group boundary | 899 | * if the old reservation is cross group boundary |
@@ -908,6 +927,8 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, | |||
908 | my_rsv->rsv_goal_size= size; | 927 | my_rsv->rsv_goal_size= size; |
909 | } | 928 | } |
910 | } | 929 | } |
930 | |||
931 | spin_lock(rsv_lock); | ||
911 | /* | 932 | /* |
912 | * shift the search start to the window near the goal block | 933 | * shift the search start to the window near the goal block |
913 | */ | 934 | */ |
@@ -921,11 +942,16 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, | |||
921 | * need to check the bitmap after we found a reservable window. | 942 | * need to check the bitmap after we found a reservable window. |
922 | */ | 943 | */ |
923 | retry: | 944 | retry: |
924 | prev_rsv = find_next_reservable_window(search_head, size, | 945 | ret = find_next_reservable_window(search_head, my_rsv, sb, |
925 | &start_block, group_end_block); | 946 | start_block, group_end_block); |
926 | if (prev_rsv == NULL) | 947 | |
927 | goto failed; | 948 | if (ret == -1) { |
928 | reservable_space_start = start_block; | 949 | if (!rsv_is_empty(&my_rsv->rsv_window)) |
950 | rsv_window_remove(sb, my_rsv); | ||
951 | spin_unlock(rsv_lock); | ||
952 | return -1; | ||
953 | } | ||
954 | |||
929 | /* | 955 | /* |
930 | * On success, find_next_reservable_window() returns the | 956 | * On success, find_next_reservable_window() returns the |
931 | * reservation window where there is a reservable space after it. | 957 | * reservation window where there is a reservable space after it. |
@@ -937,8 +963,9 @@ retry: | |||
937 | * block. Search start from the start block of the reservable space | 963 | * block. Search start from the start block of the reservable space |
938 | * we just found. | 964 | * we just found. |
939 | */ | 965 | */ |
966 | spin_unlock(rsv_lock); | ||
940 | first_free_block = bitmap_search_next_usable_block( | 967 | first_free_block = bitmap_search_next_usable_block( |
941 | reservable_space_start - group_first_block, | 968 | my_rsv->rsv_start - group_first_block, |
942 | bitmap_bh, group_end_block - group_first_block + 1); | 969 | bitmap_bh, group_end_block - group_first_block + 1); |
943 | 970 | ||
944 | if (first_free_block < 0) { | 971 | if (first_free_block < 0) { |
@@ -946,54 +973,29 @@ retry: | |||
946 | * no free block left on the bitmap, no point | 973 | * no free block left on the bitmap, no point |
947 | * to reserve the space. return failed. | 974 | * to reserve the space. return failed. |
948 | */ | 975 | */ |
949 | goto failed; | 976 | spin_lock(rsv_lock); |
977 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
978 | rsv_window_remove(sb, my_rsv); | ||
979 | spin_unlock(rsv_lock); | ||
980 | return -1; /* failed */ | ||
950 | } | 981 | } |
982 | |||
951 | start_block = first_free_block + group_first_block; | 983 | start_block = first_free_block + group_first_block; |
952 | /* | 984 | /* |
953 | * check if the first free block is within the | 985 | * check if the first free block is within the |
954 | * free space we just found | 986 | * free space we just reserved |
955 | */ | 987 | */ |
956 | if ((start_block >= reservable_space_start) && | 988 | if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end) |
957 | (start_block < reservable_space_start + size)) | 989 | return 0; /* success */ |
958 | goto found_rsv_window; | ||
959 | /* | 990 | /* |
960 | * if the first free bit we found is out of the reservable space | 991 | * if the first free bit we found is out of the reservable space |
961 | * this means there is no free block on the reservable space | 992 | * continue search for next reservable space, |
962 | * we should continue search for next reservable space, | ||
963 | * start from where the free block is, | 993 | * start from where the free block is, |
964 | * we also shift the list head to where we stopped last time | 994 | * we also shift the list head to where we stopped last time |
965 | */ | 995 | */ |
966 | search_head = prev_rsv; | 996 | search_head = my_rsv; |
997 | spin_lock(rsv_lock); | ||
967 | goto retry; | 998 | goto retry; |
968 | |||
969 | found_rsv_window: | ||
970 | /* | ||
971 | * great! the reservable space contains some free blocks. | ||
972 | * if the search returns that we should add the new | ||
973 | * window just next to where the old window, we don't | ||
974 | * need to remove the old window first then add it to the | ||
975 | * same place, just update the new start and new end. | ||
976 | */ | ||
977 | if (my_rsv != prev_rsv) { | ||
978 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
979 | rsv_window_remove(sb, my_rsv); | ||
980 | } | ||
981 | my_rsv->rsv_start = reservable_space_start; | ||
982 | my_rsv->rsv_end = my_rsv->rsv_start + size - 1; | ||
983 | my_rsv->rsv_alloc_hit = 0; | ||
984 | if (my_rsv != prev_rsv) { | ||
985 | ext3_rsv_window_add(sb, my_rsv); | ||
986 | } | ||
987 | return 0; /* succeed */ | ||
988 | failed: | ||
989 | /* | ||
990 | * failed to find a new reservation window in the current | ||
991 | * group, remove the current(stale) reservation window | ||
992 | * if there is any | ||
993 | */ | ||
994 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
995 | rsv_window_remove(sb, my_rsv); | ||
996 | return -1; /* failed */ | ||
997 | } | 999 | } |
998 | 1000 | ||
999 | /* | 1001 | /* |
@@ -1023,7 +1025,6 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | |||
1023 | int goal, struct ext3_reserve_window_node * my_rsv, | 1025 | int goal, struct ext3_reserve_window_node * my_rsv, |
1024 | int *errp) | 1026 | int *errp) |
1025 | { | 1027 | { |
1026 | spinlock_t *rsv_lock; | ||
1027 | unsigned long group_first_block; | 1028 | unsigned long group_first_block; |
1028 | int ret = 0; | 1029 | int ret = 0; |
1029 | int fatal; | 1030 | int fatal; |
@@ -1052,7 +1053,6 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | |||
1052 | ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL); | 1053 | ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL); |
1053 | goto out; | 1054 | goto out; |
1054 | } | 1055 | } |
1055 | rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; | ||
1056 | /* | 1056 | /* |
1057 | * goal is a group relative block number (if there is a goal) | 1057 | * goal is a group relative block number (if there is a goal) |
1058 | * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb) | 1058 | * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb) |
@@ -1078,30 +1078,21 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | |||
1078 | * then we could go to allocate from the reservation window directly. | 1078 | * then we could go to allocate from the reservation window directly. |
1079 | */ | 1079 | */ |
1080 | while (1) { | 1080 | while (1) { |
1081 | struct ext3_reserve_window rsv_copy; | 1081 | if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || |
1082 | 1082 | !goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) { | |
1083 | rsv_copy._rsv_start = my_rsv->rsv_start; | ||
1084 | rsv_copy._rsv_end = my_rsv->rsv_end; | ||
1085 | |||
1086 | if (rsv_is_empty(&rsv_copy) || (ret < 0) || | ||
1087 | !goal_in_my_reservation(&rsv_copy, goal, group, sb)) { | ||
1088 | spin_lock(rsv_lock); | ||
1089 | ret = alloc_new_reservation(my_rsv, goal, sb, | 1083 | ret = alloc_new_reservation(my_rsv, goal, sb, |
1090 | group, bitmap_bh); | 1084 | group, bitmap_bh); |
1091 | rsv_copy._rsv_start = my_rsv->rsv_start; | ||
1092 | rsv_copy._rsv_end = my_rsv->rsv_end; | ||
1093 | spin_unlock(rsv_lock); | ||
1094 | if (ret < 0) | 1085 | if (ret < 0) |
1095 | break; /* failed */ | 1086 | break; /* failed */ |
1096 | 1087 | ||
1097 | if (!goal_in_my_reservation(&rsv_copy, goal, group, sb)) | 1088 | if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) |
1098 | goal = -1; | 1089 | goal = -1; |
1099 | } | 1090 | } |
1100 | if ((rsv_copy._rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) | 1091 | if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) |
1101 | || (rsv_copy._rsv_end < group_first_block)) | 1092 | || (my_rsv->rsv_end < group_first_block)) |
1102 | BUG(); | 1093 | BUG(); |
1103 | ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, | 1094 | ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, |
1104 | &rsv_copy); | 1095 | &my_rsv->rsv_window); |
1105 | if (ret >= 0) { | 1096 | if (ret >= 0) { |
1106 | my_rsv->rsv_alloc_hit++; | 1097 | my_rsv->rsv_alloc_hit++; |
1107 | break; /* succeed */ | 1098 | break; /* succeed */ |
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 5ad8cf0292d..98e78345ead 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
@@ -36,7 +36,11 @@ static int ext3_release_file (struct inode * inode, struct file * filp) | |||
36 | /* if we are the last writer on the inode, drop the block reservation */ | 36 | /* if we are the last writer on the inode, drop the block reservation */ |
37 | if ((filp->f_mode & FMODE_WRITE) && | 37 | if ((filp->f_mode & FMODE_WRITE) && |
38 | (atomic_read(&inode->i_writecount) == 1)) | 38 | (atomic_read(&inode->i_writecount) == 1)) |
39 | { | ||
40 | down(&EXT3_I(inode)->truncate_sem); | ||
39 | ext3_discard_reservation(inode); | 41 | ext3_discard_reservation(inode); |
42 | up(&EXT3_I(inode)->truncate_sem); | ||
43 | } | ||
40 | if (is_dx(inode) && filp->private_data) | 44 | if (is_dx(inode) && filp->private_data) |
41 | ext3_htree_free_dir_info(filp->private_data); | 45 | ext3_htree_free_dir_info(filp->private_data); |
42 | 46 | ||