diff options
author | Amir Goldstein <amir73il@users.sf.net> | 2011-05-09 21:48:13 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2011-05-09 21:48:13 -0400 |
commit | 2de8807b25de6d24476923121e3b20146fe8216b (patch) | |
tree | 083909f3bad86d16a1a65511771945b2ccbb917f /fs/ext4/mballoc.c | |
parent | e73a347b7723757bb5fb5c502814dc205a7f496d (diff) |
ext4: synchronize ext4_mb_init_group() with buddy page lock
The old routines ext4_mb_[get|put]_buddy_cache_lock(), which used
to take grp->alloc_sem for all groups on the buddy page have been
replaced with the routines ext4_mb_[get|put]_buddy_page_lock().
The new routines take both buddy and bitmap page locks to protect
against concurrent init of groups on the same buddy page.
The GROUP_NEED_INIT flag is tested again under page lock to check
if the group was initialized by another caller.
Signed-off-by: Amir Goldstein <amir73il@users.sf.net>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 175 |
1 files changed, 62 insertions, 113 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 65329f148da5..7311f25a88ea 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -957,22 +957,21 @@ out: | |||
957 | } | 957 | } |
958 | 958 | ||
959 | /* | 959 | /* |
960 | * lock the group_info alloc_sem of all the groups | 960 | * Lock the buddy and bitmap pages. This make sure other parallel init_group |
961 | * belonging to the same buddy cache page. This | 961 | * on the same buddy page doesn't happen whild holding the buddy page lock. |
962 | * make sure other parallel operation on the buddy | 962 | * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap |
963 | * cache doesn't happen whild holding the buddy cache | 963 | * are on the same page e4b->bd_buddy_page is NULL and return value is 0. |
964 | * lock | ||
965 | */ | 964 | */ |
966 | static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, | 965 | static int ext4_mb_get_buddy_page_lock(struct super_block *sb, |
967 | ext4_group_t group) | 966 | ext4_group_t group, struct ext4_buddy *e4b) |
968 | { | 967 | { |
969 | int i; | 968 | struct inode *inode = EXT4_SB(sb)->s_buddy_cache; |
970 | int block, pnum; | 969 | int block, pnum, poff; |
971 | int blocks_per_page; | 970 | int blocks_per_page; |
972 | int groups_per_page; | 971 | struct page *page; |
973 | ext4_group_t ngroups = ext4_get_groups_count(sb); | 972 | |
974 | ext4_group_t first_group; | 973 | e4b->bd_buddy_page = NULL; |
975 | struct ext4_group_info *grp; | 974 | e4b->bd_bitmap_page = NULL; |
976 | 975 | ||
977 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | 976 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
978 | /* | 977 | /* |
@@ -982,57 +981,40 @@ static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, | |||
982 | */ | 981 | */ |
983 | block = group * 2; | 982 | block = group * 2; |
984 | pnum = block / blocks_per_page; | 983 | pnum = block / blocks_per_page; |
985 | first_group = pnum * blocks_per_page / 2; | 984 | poff = block % blocks_per_page; |
986 | 985 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | |
987 | groups_per_page = blocks_per_page >> 1; | 986 | if (!page) |
988 | if (groups_per_page == 0) | 987 | return -EIO; |
989 | groups_per_page = 1; | 988 | BUG_ON(page->mapping != inode->i_mapping); |
990 | /* read all groups the page covers into the cache */ | 989 | e4b->bd_bitmap_page = page; |
991 | for (i = 0; i < groups_per_page; i++) { | 990 | e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); |
992 | 991 | ||
993 | if ((first_group + i) >= ngroups) | 992 | if (blocks_per_page >= 2) { |
994 | break; | 993 | /* buddy and bitmap are on the same page */ |
995 | grp = ext4_get_group_info(sb, first_group + i); | 994 | return 0; |
996 | /* take all groups write allocation | ||
997 | * semaphore. This make sure there is | ||
998 | * no block allocation going on in any | ||
999 | * of that groups | ||
1000 | */ | ||
1001 | down_write_nested(&grp->alloc_sem, i); | ||
1002 | } | 995 | } |
1003 | return i; | 996 | |
997 | block++; | ||
998 | pnum = block / blocks_per_page; | ||
999 | poff = block % blocks_per_page; | ||
1000 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1001 | if (!page) | ||
1002 | return -EIO; | ||
1003 | BUG_ON(page->mapping != inode->i_mapping); | ||
1004 | e4b->bd_buddy_page = page; | ||
1005 | return 0; | ||
1004 | } | 1006 | } |
1005 | 1007 | ||
1006 | static void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | 1008 | static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b) |
1007 | ext4_group_t group, int locked_group) | ||
1008 | { | 1009 | { |
1009 | int i; | 1010 | if (e4b->bd_bitmap_page) { |
1010 | int block, pnum; | 1011 | unlock_page(e4b->bd_bitmap_page); |
1011 | int blocks_per_page; | 1012 | page_cache_release(e4b->bd_bitmap_page); |
1012 | ext4_group_t first_group; | 1013 | } |
1013 | struct ext4_group_info *grp; | 1014 | if (e4b->bd_buddy_page) { |
1014 | 1015 | unlock_page(e4b->bd_buddy_page); | |
1015 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | 1016 | page_cache_release(e4b->bd_buddy_page); |
1016 | /* | ||
1017 | * the buddy cache inode stores the block bitmap | ||
1018 | * and buddy information in consecutive blocks. | ||
1019 | * So for each group we need two blocks. | ||
1020 | */ | ||
1021 | block = group * 2; | ||
1022 | pnum = block / blocks_per_page; | ||
1023 | first_group = pnum * blocks_per_page / 2; | ||
1024 | /* release locks on all the groups */ | ||
1025 | for (i = 0; i < locked_group; i++) { | ||
1026 | |||
1027 | grp = ext4_get_group_info(sb, first_group + i); | ||
1028 | /* take all groups write allocation | ||
1029 | * semaphore. This make sure there is | ||
1030 | * no block allocation going on in any | ||
1031 | * of that groups | ||
1032 | */ | ||
1033 | up_write(&grp->alloc_sem); | ||
1034 | } | 1017 | } |
1035 | |||
1036 | } | 1018 | } |
1037 | 1019 | ||
1038 | /* | 1020 | /* |
@@ -1044,93 +1026,60 @@ static noinline_for_stack | |||
1044 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | 1026 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) |
1045 | { | 1027 | { |
1046 | 1028 | ||
1047 | int ret = 0; | ||
1048 | void *bitmap; | ||
1049 | int blocks_per_page; | ||
1050 | int block, pnum, poff; | ||
1051 | int num_grp_locked = 0; | ||
1052 | struct ext4_group_info *this_grp; | 1029 | struct ext4_group_info *this_grp; |
1053 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1030 | struct ext4_buddy e4b; |
1054 | struct inode *inode = sbi->s_buddy_cache; | 1031 | struct page *page; |
1055 | struct page *page = NULL, *bitmap_page = NULL; | 1032 | int ret = 0; |
1056 | 1033 | ||
1057 | mb_debug(1, "init group %u\n", group); | 1034 | mb_debug(1, "init group %u\n", group); |
1058 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1059 | this_grp = ext4_get_group_info(sb, group); | 1035 | this_grp = ext4_get_group_info(sb, group); |
1060 | /* | 1036 | /* |
1061 | * This ensures that we don't reinit the buddy cache | 1037 | * This ensures that we don't reinit the buddy cache |
1062 | * page which map to the group from which we are already | 1038 | * page which map to the group from which we are already |
1063 | * allocating. If we are looking at the buddy cache we would | 1039 | * allocating. If we are looking at the buddy cache we would |
1064 | * have taken a reference using ext4_mb_load_buddy and that | 1040 | * have taken a reference using ext4_mb_load_buddy and that |
1065 | * would have taken the alloc_sem lock. | 1041 | * would have pinned buddy page to page cache. |
1066 | */ | 1042 | */ |
1067 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | 1043 | ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b); |
1068 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | 1044 | if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { |
1069 | /* | 1045 | /* |
1070 | * somebody initialized the group | 1046 | * somebody initialized the group |
1071 | * return without doing anything | 1047 | * return without doing anything |
1072 | */ | 1048 | */ |
1073 | ret = 0; | ||
1074 | goto err; | 1049 | goto err; |
1075 | } | 1050 | } |
1076 | /* | 1051 | |
1077 | * the buddy cache inode stores the block bitmap | 1052 | page = e4b.bd_bitmap_page; |
1078 | * and buddy information in consecutive blocks. | 1053 | ret = ext4_mb_init_cache(page, NULL); |
1079 | * So for each group we need two blocks. | 1054 | if (ret) |
1080 | */ | 1055 | goto err; |
1081 | block = group * 2; | 1056 | if (!PageUptodate(page)) { |
1082 | pnum = block / blocks_per_page; | ||
1083 | poff = block % blocks_per_page; | ||
1084 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1085 | if (page) { | ||
1086 | BUG_ON(page->mapping != inode->i_mapping); | ||
1087 | ret = ext4_mb_init_cache(page, NULL); | ||
1088 | if (ret) { | ||
1089 | unlock_page(page); | ||
1090 | goto err; | ||
1091 | } | ||
1092 | unlock_page(page); | ||
1093 | } | ||
1094 | if (page == NULL || !PageUptodate(page)) { | ||
1095 | ret = -EIO; | 1057 | ret = -EIO; |
1096 | goto err; | 1058 | goto err; |
1097 | } | 1059 | } |
1098 | mark_page_accessed(page); | 1060 | mark_page_accessed(page); |
1099 | bitmap_page = page; | ||
1100 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1101 | 1061 | ||
1102 | /* init buddy cache */ | 1062 | if (e4b.bd_buddy_page == NULL) { |
1103 | block++; | ||
1104 | pnum = block / blocks_per_page; | ||
1105 | poff = block % blocks_per_page; | ||
1106 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1107 | if (page == bitmap_page) { | ||
1108 | /* | 1063 | /* |
1109 | * If both the bitmap and buddy are in | 1064 | * If both the bitmap and buddy are in |
1110 | * the same page we don't need to force | 1065 | * the same page we don't need to force |
1111 | * init the buddy | 1066 | * init the buddy |
1112 | */ | 1067 | */ |
1113 | unlock_page(page); | 1068 | ret = 0; |
1114 | } else if (page) { | 1069 | goto err; |
1115 | BUG_ON(page->mapping != inode->i_mapping); | ||
1116 | ret = ext4_mb_init_cache(page, bitmap); | ||
1117 | if (ret) { | ||
1118 | unlock_page(page); | ||
1119 | goto err; | ||
1120 | } | ||
1121 | unlock_page(page); | ||
1122 | } | 1070 | } |
1123 | if (page == NULL || !PageUptodate(page)) { | 1071 | /* init buddy cache */ |
1072 | page = e4b.bd_buddy_page; | ||
1073 | ret = ext4_mb_init_cache(page, e4b.bd_bitmap); | ||
1074 | if (ret) | ||
1075 | goto err; | ||
1076 | if (!PageUptodate(page)) { | ||
1124 | ret = -EIO; | 1077 | ret = -EIO; |
1125 | goto err; | 1078 | goto err; |
1126 | } | 1079 | } |
1127 | mark_page_accessed(page); | 1080 | mark_page_accessed(page); |
1128 | err: | 1081 | err: |
1129 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | 1082 | ext4_mb_put_buddy_page_lock(&e4b); |
1130 | if (bitmap_page) | ||
1131 | page_cache_release(bitmap_page); | ||
1132 | if (page) | ||
1133 | page_cache_release(page); | ||
1134 | return ret; | 1083 | return ret; |
1135 | } | 1084 | } |
1136 | 1085 | ||