diff options
author | Frederic Bohe <frederic.bohe@bull.net> | 2008-07-11 19:27:31 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2008-07-11 19:27:31 -0400 |
commit | 5f21b0e642d7bf6fe4434c9ba12bc9cb96b17cf7 (patch) | |
tree | 0f391fac5cc7fa93129bf8dd853598c6d2d65bb5 | |
parent | 953e622b601f58b7cc0f29fe644457fa40a18456 (diff) |
ext4: fix online resize with mballoc
Update group infos when updating a group's descriptor.
Add group infos when adding a group's descriptor.
Refresh cache pages used by mb_alloc when changes occur.
This will probably need modifications when META_BG resizing will be allowed.
Signed-off-by: Frederic Bohe <frederic.bohe@bull.net>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
-rw-r--r-- | fs/ext4/ext4.h | 4 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 234 | ||||
-rw-r--r-- | fs/ext4/resize.c | 52 |
3 files changed, 234 insertions, 56 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2c4b48519c8b..64edb09c481e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1033,6 +1033,10 @@ extern int __init init_ext4_mballoc(void); | |||
1033 | extern void exit_ext4_mballoc(void); | 1033 | extern void exit_ext4_mballoc(void); |
1034 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, | 1034 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, |
1035 | unsigned long, unsigned long, int, unsigned long *); | 1035 | unsigned long, unsigned long, int, unsigned long *); |
1036 | extern int ext4_mb_add_more_groupinfo(struct super_block *sb, | ||
1037 | ext4_group_t i, struct ext4_group_desc *desc); | ||
1038 | extern void ext4_mb_update_group_info(struct ext4_group_info *grp, | ||
1039 | ext4_grpblk_t add); | ||
1036 | 1040 | ||
1037 | 1041 | ||
1038 | /* inode.c */ | 1042 | /* inode.c */ |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1666ac184e31..8d254ca83d9e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2236,21 +2236,192 @@ ext4_mb_store_history(struct ext4_allocation_context *ac) | |||
2236 | #define ext4_mb_history_init(sb) | 2236 | #define ext4_mb_history_init(sb) |
2237 | #endif | 2237 | #endif |
2238 | 2238 | ||
2239 | |||
2240 | /* Create and initialize ext4_group_info data for the given group. */ | ||
2241 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | ||
2242 | struct ext4_group_desc *desc) | ||
2243 | { | ||
2244 | int i, len; | ||
2245 | int metalen = 0; | ||
2246 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2247 | struct ext4_group_info **meta_group_info; | ||
2248 | |||
2249 | /* | ||
2250 | * First check if this group is the first of a reserved block. | ||
2251 | * If it's true, we have to allocate a new table of pointers | ||
2252 | * to ext4_group_info structures | ||
2253 | */ | ||
2254 | if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { | ||
2255 | metalen = sizeof(*meta_group_info) << | ||
2256 | EXT4_DESC_PER_BLOCK_BITS(sb); | ||
2257 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | ||
2258 | if (meta_group_info == NULL) { | ||
2259 | printk(KERN_ERR "EXT4-fs: can't allocate mem for a " | ||
2260 | "buddy group\n"); | ||
2261 | goto exit_meta_group_info; | ||
2262 | } | ||
2263 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = | ||
2264 | meta_group_info; | ||
2265 | } | ||
2266 | |||
2267 | /* | ||
2268 | * calculate needed size. if change bb_counters size, | ||
2269 | * don't forget about ext4_mb_generate_buddy() | ||
2270 | */ | ||
2271 | len = offsetof(typeof(**meta_group_info), | ||
2272 | bb_counters[sb->s_blocksize_bits + 2]); | ||
2273 | |||
2274 | meta_group_info = | ||
2275 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | ||
2276 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | ||
2277 | |||
2278 | meta_group_info[i] = kzalloc(len, GFP_KERNEL); | ||
2279 | if (meta_group_info[i] == NULL) { | ||
2280 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | ||
2281 | goto exit_group_info; | ||
2282 | } | ||
2283 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | ||
2284 | &(meta_group_info[i]->bb_state)); | ||
2285 | |||
2286 | /* | ||
2287 | * initialize bb_free to be able to skip | ||
2288 | * empty groups without initialization | ||
2289 | */ | ||
2290 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
2291 | meta_group_info[i]->bb_free = | ||
2292 | ext4_free_blocks_after_init(sb, group, desc); | ||
2293 | } else { | ||
2294 | meta_group_info[i]->bb_free = | ||
2295 | le16_to_cpu(desc->bg_free_blocks_count); | ||
2296 | } | ||
2297 | |||
2298 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | ||
2299 | |||
2300 | #ifdef DOUBLE_CHECK | ||
2301 | { | ||
2302 | struct buffer_head *bh; | ||
2303 | meta_group_info[i]->bb_bitmap = | ||
2304 | kmalloc(sb->s_blocksize, GFP_KERNEL); | ||
2305 | BUG_ON(meta_group_info[i]->bb_bitmap == NULL); | ||
2306 | bh = ext4_read_block_bitmap(sb, group); | ||
2307 | BUG_ON(bh == NULL); | ||
2308 | memcpy(meta_group_info[i]->bb_bitmap, bh->b_data, | ||
2309 | sb->s_blocksize); | ||
2310 | put_bh(bh); | ||
2311 | } | ||
2312 | #endif | ||
2313 | |||
2314 | return 0; | ||
2315 | |||
2316 | exit_group_info: | ||
2317 | /* If a meta_group_info table has been allocated, release it now */ | ||
2318 | if (group % EXT4_DESC_PER_BLOCK(sb) == 0) | ||
2319 | kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); | ||
2320 | exit_meta_group_info: | ||
2321 | return -ENOMEM; | ||
2322 | } /* ext4_mb_add_groupinfo */ | ||
2323 | |||
2324 | /* | ||
2325 | * Add a group to the existing groups. | ||
2326 | * This function is used for online resize | ||
2327 | */ | ||
2328 | int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group, | ||
2329 | struct ext4_group_desc *desc) | ||
2330 | { | ||
2331 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2332 | struct inode *inode = sbi->s_buddy_cache; | ||
2333 | int blocks_per_page; | ||
2334 | int block; | ||
2335 | int pnum; | ||
2336 | struct page *page; | ||
2337 | int err; | ||
2338 | |||
2339 | /* Add group based on group descriptor*/ | ||
2340 | err = ext4_mb_add_groupinfo(sb, group, desc); | ||
2341 | if (err) | ||
2342 | return err; | ||
2343 | |||
2344 | /* | ||
2345 | * Cache pages containing dynamic mb_alloc datas (buddy and bitmap | ||
2346 | * datas) are set not up to date so that they will be re-initilaized | ||
2347 | * during the next call to ext4_mb_load_buddy | ||
2348 | */ | ||
2349 | |||
2350 | /* Set buddy page as not up to date */ | ||
2351 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
2352 | block = group * 2; | ||
2353 | pnum = block / blocks_per_page; | ||
2354 | page = find_get_page(inode->i_mapping, pnum); | ||
2355 | if (page != NULL) { | ||
2356 | ClearPageUptodate(page); | ||
2357 | page_cache_release(page); | ||
2358 | } | ||
2359 | |||
2360 | /* Set bitmap page as not up to date */ | ||
2361 | block++; | ||
2362 | pnum = block / blocks_per_page; | ||
2363 | page = find_get_page(inode->i_mapping, pnum); | ||
2364 | if (page != NULL) { | ||
2365 | ClearPageUptodate(page); | ||
2366 | page_cache_release(page); | ||
2367 | } | ||
2368 | |||
2369 | return 0; | ||
2370 | } | ||
2371 | |||
2372 | /* | ||
2373 | * Update an existing group. | ||
2374 | * This function is used for online resize | ||
2375 | */ | ||
2376 | void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) | ||
2377 | { | ||
2378 | grp->bb_free += add; | ||
2379 | } | ||
2380 | |||
2239 | static int ext4_mb_init_backend(struct super_block *sb) | 2381 | static int ext4_mb_init_backend(struct super_block *sb) |
2240 | { | 2382 | { |
2241 | ext4_group_t i; | 2383 | ext4_group_t i; |
2242 | int j, len, metalen; | 2384 | int metalen; |
2243 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2385 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2244 | int num_meta_group_infos = | 2386 | struct ext4_super_block *es = sbi->s_es; |
2245 | (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) >> | 2387 | int num_meta_group_infos; |
2246 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2388 | int num_meta_group_infos_max; |
2389 | int array_size; | ||
2247 | struct ext4_group_info **meta_group_info; | 2390 | struct ext4_group_info **meta_group_info; |
2391 | struct ext4_group_desc *desc; | ||
2392 | |||
2393 | /* This is the number of blocks used by GDT */ | ||
2394 | num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - | ||
2395 | 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); | ||
2248 | 2396 | ||
2397 | /* | ||
2398 | * This is the total number of blocks used by GDT including | ||
2399 | * the number of reserved blocks for GDT. | ||
2400 | * The s_group_info array is allocated with this value | ||
2401 | * to allow a clean online resize without a complex | ||
2402 | * manipulation of pointer. | ||
2403 | * The drawback is the unused memory when no resize | ||
2404 | * occurs but it's very low in terms of pages | ||
2405 | * (see comments below) | ||
2406 | * Need to handle this properly when META_BG resizing is allowed | ||
2407 | */ | ||
2408 | num_meta_group_infos_max = num_meta_group_infos + | ||
2409 | le16_to_cpu(es->s_reserved_gdt_blocks); | ||
2410 | |||
2411 | /* | ||
2412 | * array_size is the size of s_group_info array. We round it | ||
2413 | * to the next power of two because this approximation is done | ||
2414 | * internally by kmalloc so we can have some more memory | ||
2415 | * for free here (e.g. may be used for META_BG resize). | ||
2416 | */ | ||
2417 | array_size = 1; | ||
2418 | while (array_size < sizeof(*sbi->s_group_info) * | ||
2419 | num_meta_group_infos_max) | ||
2420 | array_size = array_size << 1; | ||
2249 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte | 2421 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte |
2250 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. | 2422 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. |
2251 | * So a two level scheme suffices for now. */ | 2423 | * So a two level scheme suffices for now. */ |
2252 | sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * | 2424 | sbi->s_group_info = kmalloc(array_size, GFP_KERNEL); |
2253 | num_meta_group_infos, GFP_KERNEL); | ||
2254 | if (sbi->s_group_info == NULL) { | 2425 | if (sbi->s_group_info == NULL) { |
2255 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); | 2426 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); |
2256 | return -ENOMEM; | 2427 | return -ENOMEM; |
@@ -2277,62 +2448,15 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2277 | sbi->s_group_info[i] = meta_group_info; | 2448 | sbi->s_group_info[i] = meta_group_info; |
2278 | } | 2449 | } |
2279 | 2450 | ||
2280 | /* | ||
2281 | * calculate needed size. if change bb_counters size, | ||
2282 | * don't forget about ext4_mb_generate_buddy() | ||
2283 | */ | ||
2284 | len = sizeof(struct ext4_group_info); | ||
2285 | len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2); | ||
2286 | for (i = 0; i < sbi->s_groups_count; i++) { | 2451 | for (i = 0; i < sbi->s_groups_count; i++) { |
2287 | struct ext4_group_desc *desc; | ||
2288 | |||
2289 | meta_group_info = | ||
2290 | sbi->s_group_info[i >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | ||
2291 | j = i & (EXT4_DESC_PER_BLOCK(sb) - 1); | ||
2292 | |||
2293 | meta_group_info[j] = kzalloc(len, GFP_KERNEL); | ||
2294 | if (meta_group_info[j] == NULL) { | ||
2295 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | ||
2296 | goto err_freebuddy; | ||
2297 | } | ||
2298 | desc = ext4_get_group_desc(sb, i, NULL); | 2452 | desc = ext4_get_group_desc(sb, i, NULL); |
2299 | if (desc == NULL) { | 2453 | if (desc == NULL) { |
2300 | printk(KERN_ERR | 2454 | printk(KERN_ERR |
2301 | "EXT4-fs: can't read descriptor %lu\n", i); | 2455 | "EXT4-fs: can't read descriptor %lu\n", i); |
2302 | i++; | ||
2303 | goto err_freebuddy; | 2456 | goto err_freebuddy; |
2304 | } | 2457 | } |
2305 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | 2458 | if (ext4_mb_add_groupinfo(sb, i, desc) != 0) |
2306 | &(meta_group_info[j]->bb_state)); | 2459 | goto err_freebuddy; |
2307 | |||
2308 | /* | ||
2309 | * initialize bb_free to be able to skip | ||
2310 | * empty groups without initialization | ||
2311 | */ | ||
2312 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
2313 | meta_group_info[j]->bb_free = | ||
2314 | ext4_free_blocks_after_init(sb, i, desc); | ||
2315 | } else { | ||
2316 | meta_group_info[j]->bb_free = | ||
2317 | le16_to_cpu(desc->bg_free_blocks_count); | ||
2318 | } | ||
2319 | |||
2320 | INIT_LIST_HEAD(&meta_group_info[j]->bb_prealloc_list); | ||
2321 | |||
2322 | #ifdef DOUBLE_CHECK | ||
2323 | { | ||
2324 | struct buffer_head *bh; | ||
2325 | meta_group_info[j]->bb_bitmap = | ||
2326 | kmalloc(sb->s_blocksize, GFP_KERNEL); | ||
2327 | BUG_ON(meta_group_info[j]->bb_bitmap == NULL); | ||
2328 | bh = ext4_read_block_bitmap(sb, i); | ||
2329 | BUG_ON(bh == NULL); | ||
2330 | memcpy(meta_group_info[j]->bb_bitmap, bh->b_data, | ||
2331 | sb->s_blocksize); | ||
2332 | put_bh(bh); | ||
2333 | } | ||
2334 | #endif | ||
2335 | |||
2336 | } | 2460 | } |
2337 | 2461 | ||
2338 | return 0; | 2462 | return 0; |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9ff7b1c04239..f000fbe2cd93 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -866,6 +866,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
866 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); | 866 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); |
867 | 867 | ||
868 | /* | 868 | /* |
869 | * We can allocate memory for mb_alloc based on the new group | ||
870 | * descriptor | ||
871 | */ | ||
872 | if (test_opt(sb, MBALLOC)) { | ||
873 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); | ||
874 | if (err) | ||
875 | goto exit_journal; | ||
876 | } | ||
877 | /* | ||
869 | * Make the new blocks and inodes valid next. We do this before | 878 | * Make the new blocks and inodes valid next. We do this before |
870 | * increasing the group count so that once the group is enabled, | 879 | * increasing the group count so that once the group is enabled, |
871 | * all of its blocks and inodes are already valid. | 880 | * all of its blocks and inodes are already valid. |
@@ -957,6 +966,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
957 | handle_t *handle; | 966 | handle_t *handle; |
958 | int err; | 967 | int err; |
959 | unsigned long freed_blocks; | 968 | unsigned long freed_blocks; |
969 | ext4_group_t group; | ||
970 | struct ext4_group_info *grp; | ||
960 | 971 | ||
961 | /* We don't need to worry about locking wrt other resizers just | 972 | /* We don't need to worry about locking wrt other resizers just |
962 | * yet: we're going to revalidate es->s_blocks_count after | 973 | * yet: we're going to revalidate es->s_blocks_count after |
@@ -988,7 +999,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
988 | } | 999 | } |
989 | 1000 | ||
990 | /* Handle the remaining blocks in the last group only. */ | 1001 | /* Handle the remaining blocks in the last group only. */ |
991 | ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); | 1002 | ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); |
992 | 1003 | ||
993 | if (last == 0) { | 1004 | if (last == 0) { |
994 | ext4_warning(sb, __func__, | 1005 | ext4_warning(sb, __func__, |
@@ -1060,6 +1071,45 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1060 | o_blocks_count + add); | 1071 | o_blocks_count + add); |
1061 | if ((err = ext4_journal_stop(handle))) | 1072 | if ((err = ext4_journal_stop(handle))) |
1062 | goto exit_put; | 1073 | goto exit_put; |
1074 | |||
1075 | /* | ||
1076 | * Mark mballoc pages as not up to date so that they will be updated | ||
1077 | * next time they are loaded by ext4_mb_load_buddy. | ||
1078 | */ | ||
1079 | if (test_opt(sb, MBALLOC)) { | ||
1080 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1081 | struct inode *inode = sbi->s_buddy_cache; | ||
1082 | int blocks_per_page; | ||
1083 | int block; | ||
1084 | int pnum; | ||
1085 | struct page *page; | ||
1086 | |||
1087 | /* Set buddy page as not up to date */ | ||
1088 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1089 | block = group * 2; | ||
1090 | pnum = block / blocks_per_page; | ||
1091 | page = find_get_page(inode->i_mapping, pnum); | ||
1092 | if (page != NULL) { | ||
1093 | ClearPageUptodate(page); | ||
1094 | page_cache_release(page); | ||
1095 | } | ||
1096 | |||
1097 | /* Set bitmap page as not up to date */ | ||
1098 | block++; | ||
1099 | pnum = block / blocks_per_page; | ||
1100 | page = find_get_page(inode->i_mapping, pnum); | ||
1101 | if (page != NULL) { | ||
1102 | ClearPageUptodate(page); | ||
1103 | page_cache_release(page); | ||
1104 | } | ||
1105 | |||
1106 | /* Get the info on the last group */ | ||
1107 | grp = ext4_get_group_info(sb, group); | ||
1108 | |||
1109 | /* Update free blocks in group info */ | ||
1110 | ext4_mb_update_group_info(grp, add); | ||
1111 | } | ||
1112 | |||
1063 | if (test_opt(sb, DEBUG)) | 1113 | if (test_opt(sb, DEBUG)) |
1064 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", | 1114 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", |
1065 | ext4_blocks_count(es)); | 1115 | ext4_blocks_count(es)); |