aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
authorAmir Goldstein <amir73il@users.sf.net>2011-05-09 21:48:13 -0400
committerTheodore Ts'o <tytso@mit.edu>2011-05-09 21:48:13 -0400
commit2de8807b25de6d24476923121e3b20146fe8216b (patch)
tree083909f3bad86d16a1a65511771945b2ccbb917f /fs/ext4/mballoc.c
parente73a347b7723757bb5fb5c502814dc205a7f496d (diff)
ext4: synchronize ext4_mb_init_group() with buddy page lock
The old routines ext4_mb_[get|put]_buddy_cache_lock(), which used to take grp->alloc_sem for all groups on the buddy page have been replaced with the routines ext4_mb_[get|put]_buddy_page_lock(). The new routines take both buddy and bitmap page locks to protect against concurrent init of groups on the same buddy page. The GROUP_NEED_INIT flag is tested again under page lock to check if the group was initialized by another caller. Signed-off-by: Amir Goldstein <amir73il@users.sf.net> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c175
1 files changed, 62 insertions, 113 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 65329f148da..7311f25a88e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -957,22 +957,21 @@ out:
957} 957}
958 958
959/* 959/*
960 * lock the group_info alloc_sem of all the groups 960 * Lock the buddy and bitmap pages. This make sure other parallel init_group
961 * belonging to the same buddy cache page. This 961 * on the same buddy page doesn't happen whild holding the buddy page lock.
962 * make sure other parallel operation on the buddy 962 * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
963 * cache doesn't happen whild holding the buddy cache 963 * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
964 * lock
965 */ 964 */
966static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, 965static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
967 ext4_group_t group) 966 ext4_group_t group, struct ext4_buddy *e4b)
968{ 967{
969 int i; 968 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
970 int block, pnum; 969 int block, pnum, poff;
971 int blocks_per_page; 970 int blocks_per_page;
972 int groups_per_page; 971 struct page *page;
973 ext4_group_t ngroups = ext4_get_groups_count(sb); 972
974 ext4_group_t first_group; 973 e4b->bd_buddy_page = NULL;
975 struct ext4_group_info *grp; 974 e4b->bd_bitmap_page = NULL;
976 975
977 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; 976 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
978 /* 977 /*
@@ -982,57 +981,40 @@ static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
982 */ 981 */
983 block = group * 2; 982 block = group * 2;
984 pnum = block / blocks_per_page; 983 pnum = block / blocks_per_page;
985 first_group = pnum * blocks_per_page / 2; 984 poff = block % blocks_per_page;
986 985 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
987 groups_per_page = blocks_per_page >> 1; 986 if (!page)
988 if (groups_per_page == 0) 987 return -EIO;
989 groups_per_page = 1; 988 BUG_ON(page->mapping != inode->i_mapping);
990 /* read all groups the page covers into the cache */ 989 e4b->bd_bitmap_page = page;
991 for (i = 0; i < groups_per_page; i++) { 990 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
992 991
993 if ((first_group + i) >= ngroups) 992 if (blocks_per_page >= 2) {
994 break; 993 /* buddy and bitmap are on the same page */
995 grp = ext4_get_group_info(sb, first_group + i); 994 return 0;
996 /* take all groups write allocation
997 * semaphore. This make sure there is
998 * no block allocation going on in any
999 * of that groups
1000 */
1001 down_write_nested(&grp->alloc_sem, i);
1002 } 995 }
1003 return i; 996
997 block++;
998 pnum = block / blocks_per_page;
999 poff = block % blocks_per_page;
1000 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1001 if (!page)
1002 return -EIO;
1003 BUG_ON(page->mapping != inode->i_mapping);
1004 e4b->bd_buddy_page = page;
1005 return 0;
1004} 1006}
1005 1007
1006static void ext4_mb_put_buddy_cache_lock(struct super_block *sb, 1008static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1007 ext4_group_t group, int locked_group)
1008{ 1009{
1009 int i; 1010 if (e4b->bd_bitmap_page) {
1010 int block, pnum; 1011 unlock_page(e4b->bd_bitmap_page);
1011 int blocks_per_page; 1012 page_cache_release(e4b->bd_bitmap_page);
1012 ext4_group_t first_group; 1013 }
1013 struct ext4_group_info *grp; 1014 if (e4b->bd_buddy_page) {
1014 1015 unlock_page(e4b->bd_buddy_page);
1015 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; 1016 page_cache_release(e4b->bd_buddy_page);
1016 /*
1017 * the buddy cache inode stores the block bitmap
1018 * and buddy information in consecutive blocks.
1019 * So for each group we need two blocks.
1020 */
1021 block = group * 2;
1022 pnum = block / blocks_per_page;
1023 first_group = pnum * blocks_per_page / 2;
1024 /* release locks on all the groups */
1025 for (i = 0; i < locked_group; i++) {
1026
1027 grp = ext4_get_group_info(sb, first_group + i);
1028 /* take all groups write allocation
1029 * semaphore. This make sure there is
1030 * no block allocation going on in any
1031 * of that groups
1032 */
1033 up_write(&grp->alloc_sem);
1034 } 1017 }
1035
1036} 1018}
1037 1019
1038/* 1020/*
@@ -1044,93 +1026,60 @@ static noinline_for_stack
1044int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) 1026int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1045{ 1027{
1046 1028
1047 int ret = 0;
1048 void *bitmap;
1049 int blocks_per_page;
1050 int block, pnum, poff;
1051 int num_grp_locked = 0;
1052 struct ext4_group_info *this_grp; 1029 struct ext4_group_info *this_grp;
1053 struct ext4_sb_info *sbi = EXT4_SB(sb); 1030 struct ext4_buddy e4b;
1054 struct inode *inode = sbi->s_buddy_cache; 1031 struct page *page;
1055 struct page *page = NULL, *bitmap_page = NULL; 1032 int ret = 0;
1056 1033
1057 mb_debug(1, "init group %u\n", group); 1034 mb_debug(1, "init group %u\n", group);
1058 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1059 this_grp = ext4_get_group_info(sb, group); 1035 this_grp = ext4_get_group_info(sb, group);
1060 /* 1036 /*
1061 * This ensures that we don't reinit the buddy cache 1037 * This ensures that we don't reinit the buddy cache
1062 * page which map to the group from which we are already 1038 * page which map to the group from which we are already
1063 * allocating. If we are looking at the buddy cache we would 1039 * allocating. If we are looking at the buddy cache we would
1064 * have taken a reference using ext4_mb_load_buddy and that 1040 * have taken a reference using ext4_mb_load_buddy and that
1065 * would have taken the alloc_sem lock. 1041 * would have pinned buddy page to page cache.
1066 */ 1042 */
1067 num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); 1043 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
1068 if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { 1044 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1069 /* 1045 /*
1070 * somebody initialized the group 1046 * somebody initialized the group
1071 * return without doing anything 1047 * return without doing anything
1072 */ 1048 */
1073 ret = 0;
1074 goto err; 1049 goto err;
1075 } 1050 }
1076 /* 1051
1077 * the buddy cache inode stores the block bitmap 1052 page = e4b.bd_bitmap_page;
1078 * and buddy information in consecutive blocks. 1053 ret = ext4_mb_init_cache(page, NULL);
1079 * So for each group we need two blocks. 1054 if (ret)
1080 */ 1055 goto err;
1081 block = group * 2; 1056 if (!PageUptodate(page)) {
1082 pnum = block / blocks_per_page;
1083 poff = block % blocks_per_page;
1084 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1085 if (page) {
1086 BUG_ON(page->mapping != inode->i_mapping);
1087 ret = ext4_mb_init_cache(page, NULL);
1088 if (ret) {
1089 unlock_page(page);
1090 goto err;
1091 }
1092 unlock_page(page);
1093 }
1094 if (page == NULL || !PageUptodate(page)) {
1095 ret = -EIO; 1057 ret = -EIO;
1096 goto err; 1058 goto err;
1097 } 1059 }
1098 mark_page_accessed(page); 1060 mark_page_accessed(page);
1099 bitmap_page = page;
1100 bitmap = page_address(page) + (poff * sb->s_blocksize);
1101 1061
1102 /* init buddy cache */ 1062 if (e4b.bd_buddy_page == NULL) {
1103 block++;
1104 pnum = block / blocks_per_page;
1105 poff = block % blocks_per_page;
1106 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1107 if (page == bitmap_page) {
1108 /* 1063 /*
1109 * If both the bitmap and buddy are in 1064 * If both the bitmap and buddy are in
1110 * the same page we don't need to force 1065 * the same page we don't need to force
1111 * init the buddy 1066 * init the buddy
1112 */ 1067 */
1113 unlock_page(page); 1068 ret = 0;
1114 } else if (page) { 1069 goto err;
1115 BUG_ON(page->mapping != inode->i_mapping);
1116 ret = ext4_mb_init_cache(page, bitmap);
1117 if (ret) {
1118 unlock_page(page);
1119 goto err;
1120 }
1121 unlock_page(page);
1122 } 1070 }
1123 if (page == NULL || !PageUptodate(page)) { 1071 /* init buddy cache */
1072 page = e4b.bd_buddy_page;
1073 ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
1074 if (ret)
1075 goto err;
1076 if (!PageUptodate(page)) {
1124 ret = -EIO; 1077 ret = -EIO;
1125 goto err; 1078 goto err;
1126 } 1079 }
1127 mark_page_accessed(page); 1080 mark_page_accessed(page);
1128err: 1081err:
1129 ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); 1082 ext4_mb_put_buddy_page_lock(&e4b);
1130 if (bitmap_page)
1131 page_cache_release(bitmap_page);
1132 if (page)
1133 page_cache_release(page);
1134 return ret; 1083 return ret;
1135} 1084}
1136 1085