aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFrederic Bohe <frederic.bohe@bull.net>2008-07-11 19:27:31 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-07-11 19:27:31 -0400
commit5f21b0e642d7bf6fe4434c9ba12bc9cb96b17cf7 (patch)
tree0f391fac5cc7fa93129bf8dd853598c6d2d65bb5 /fs
parent953e622b601f58b7cc0f29fe644457fa40a18456 (diff)
ext4: fix online resize with mballoc
Update group infos when updating a group's descriptor. Add group infos when adding a group's descriptor. Refresh cache pages used by mb_alloc when changes occur. This will probably need modifications when META_BG resizing will be allowed. Signed-off-by: Frederic Bohe <frederic.bohe@bull.net> Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/mballoc.c234
-rw-r--r--fs/ext4/resize.c52
3 files changed, 234 insertions, 56 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2c4b48519c8b..64edb09c481e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1033,6 +1033,10 @@ extern int __init init_ext4_mballoc(void);
1033extern void exit_ext4_mballoc(void); 1033extern void exit_ext4_mballoc(void);
1034extern void ext4_mb_free_blocks(handle_t *, struct inode *, 1034extern void ext4_mb_free_blocks(handle_t *, struct inode *,
1035 unsigned long, unsigned long, int, unsigned long *); 1035 unsigned long, unsigned long, int, unsigned long *);
1036extern int ext4_mb_add_more_groupinfo(struct super_block *sb,
1037 ext4_group_t i, struct ext4_group_desc *desc);
1038extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
1039 ext4_grpblk_t add);
1036 1040
1037 1041
1038/* inode.c */ 1042/* inode.c */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1666ac184e31..8d254ca83d9e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2236,21 +2236,192 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
2236#define ext4_mb_history_init(sb) 2236#define ext4_mb_history_init(sb)
2237#endif 2237#endif
2238 2238
2239
2240/* Create and initialize ext4_group_info data for the given group. */
2241int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2242 struct ext4_group_desc *desc)
2243{
2244 int i, len;
2245 int metalen = 0;
2246 struct ext4_sb_info *sbi = EXT4_SB(sb);
2247 struct ext4_group_info **meta_group_info;
2248
2249 /*
2250 * First check if this group is the first of a reserved block.
2251 * If it's true, we have to allocate a new table of pointers
2252 * to ext4_group_info structures
2253 */
2254 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2255 metalen = sizeof(*meta_group_info) <<
2256 EXT4_DESC_PER_BLOCK_BITS(sb);
2257 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2258 if (meta_group_info == NULL) {
2259 printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
2260 "buddy group\n");
2261 goto exit_meta_group_info;
2262 }
2263 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2264 meta_group_info;
2265 }
2266
2267 /*
2268 * calculate needed size. if change bb_counters size,
2269 * don't forget about ext4_mb_generate_buddy()
2270 */
2271 len = offsetof(typeof(**meta_group_info),
2272 bb_counters[sb->s_blocksize_bits + 2]);
2273
2274 meta_group_info =
2275 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2276 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2277
2278 meta_group_info[i] = kzalloc(len, GFP_KERNEL);
2279 if (meta_group_info[i] == NULL) {
2280 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2281 goto exit_group_info;
2282 }
2283 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2284 &(meta_group_info[i]->bb_state));
2285
2286 /*
2287 * initialize bb_free to be able to skip
2288 * empty groups without initialization
2289 */
2290 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2291 meta_group_info[i]->bb_free =
2292 ext4_free_blocks_after_init(sb, group, desc);
2293 } else {
2294 meta_group_info[i]->bb_free =
2295 le16_to_cpu(desc->bg_free_blocks_count);
2296 }
2297
2298 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2299
2300#ifdef DOUBLE_CHECK
2301 {
2302 struct buffer_head *bh;
2303 meta_group_info[i]->bb_bitmap =
2304 kmalloc(sb->s_blocksize, GFP_KERNEL);
2305 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2306 bh = ext4_read_block_bitmap(sb, group);
2307 BUG_ON(bh == NULL);
2308 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2309 sb->s_blocksize);
2310 put_bh(bh);
2311 }
2312#endif
2313
2314 return 0;
2315
2316exit_group_info:
2317 /* If a meta_group_info table has been allocated, release it now */
2318 if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
2319 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2320exit_meta_group_info:
2321 return -ENOMEM;
2322} /* ext4_mb_add_groupinfo */
2323
2324/*
2325 * Add a group to the existing groups.
2326 * This function is used for online resize
2327 */
2328int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
2329 struct ext4_group_desc *desc)
2330{
2331 struct ext4_sb_info *sbi = EXT4_SB(sb);
2332 struct inode *inode = sbi->s_buddy_cache;
2333 int blocks_per_page;
2334 int block;
2335 int pnum;
2336 struct page *page;
2337 int err;
2338
2339 /* Add group based on group descriptor*/
2340 err = ext4_mb_add_groupinfo(sb, group, desc);
2341 if (err)
2342 return err;
2343
2344 /*
2345 * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
2346 * datas) are set not up to date so that they will be re-initilaized
2347 * during the next call to ext4_mb_load_buddy
2348 */
2349
2350 /* Set buddy page as not up to date */
2351 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
2352 block = group * 2;
2353 pnum = block / blocks_per_page;
2354 page = find_get_page(inode->i_mapping, pnum);
2355 if (page != NULL) {
2356 ClearPageUptodate(page);
2357 page_cache_release(page);
2358 }
2359
2360 /* Set bitmap page as not up to date */
2361 block++;
2362 pnum = block / blocks_per_page;
2363 page = find_get_page(inode->i_mapping, pnum);
2364 if (page != NULL) {
2365 ClearPageUptodate(page);
2366 page_cache_release(page);
2367 }
2368
2369 return 0;
2370}
2371
2372/*
2373 * Update an existing group.
2374 * This function is used for online resize
2375 */
2376void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
2377{
2378 grp->bb_free += add;
2379}
2380
2239static int ext4_mb_init_backend(struct super_block *sb) 2381static int ext4_mb_init_backend(struct super_block *sb)
2240{ 2382{
2241 ext4_group_t i; 2383 ext4_group_t i;
2242 int j, len, metalen; 2384 int metalen;
2243 struct ext4_sb_info *sbi = EXT4_SB(sb); 2385 struct ext4_sb_info *sbi = EXT4_SB(sb);
2244 int num_meta_group_infos = 2386 struct ext4_super_block *es = sbi->s_es;
2245 (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) >> 2387 int num_meta_group_infos;
2246 EXT4_DESC_PER_BLOCK_BITS(sb); 2388 int num_meta_group_infos_max;
2389 int array_size;
2247 struct ext4_group_info **meta_group_info; 2390 struct ext4_group_info **meta_group_info;
2391 struct ext4_group_desc *desc;
2392
2393 /* This is the number of blocks used by GDT */
2394 num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
2395 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2248 2396
2397 /*
2398 * This is the total number of blocks used by GDT including
2399 * the number of reserved blocks for GDT.
2400 * The s_group_info array is allocated with this value
2401 * to allow a clean online resize without a complex
2402 * manipulation of pointer.
2403 * The drawback is the unused memory when no resize
2404 * occurs but it's very low in terms of pages
2405 * (see comments below)
2406 * Need to handle this properly when META_BG resizing is allowed
2407 */
2408 num_meta_group_infos_max = num_meta_group_infos +
2409 le16_to_cpu(es->s_reserved_gdt_blocks);
2410
2411 /*
2412 * array_size is the size of s_group_info array. We round it
2413 * to the next power of two because this approximation is done
2414 * internally by kmalloc so we can have some more memory
2415 * for free here (e.g. may be used for META_BG resize).
2416 */
2417 array_size = 1;
2418 while (array_size < sizeof(*sbi->s_group_info) *
2419 num_meta_group_infos_max)
2420 array_size = array_size << 1;
2249 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte 2421 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
2250 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. 2422 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
2251 * So a two level scheme suffices for now. */ 2423 * So a two level scheme suffices for now. */
2252 sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * 2424 sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
2253 num_meta_group_infos, GFP_KERNEL);
2254 if (sbi->s_group_info == NULL) { 2425 if (sbi->s_group_info == NULL) {
2255 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); 2426 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
2256 return -ENOMEM; 2427 return -ENOMEM;
@@ -2277,62 +2448,15 @@ static int ext4_mb_init_backend(struct super_block *sb)
2277 sbi->s_group_info[i] = meta_group_info; 2448 sbi->s_group_info[i] = meta_group_info;
2278 } 2449 }
2279 2450
2280 /*
2281 * calculate needed size. if change bb_counters size,
2282 * don't forget about ext4_mb_generate_buddy()
2283 */
2284 len = sizeof(struct ext4_group_info);
2285 len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2);
2286 for (i = 0; i < sbi->s_groups_count; i++) { 2451 for (i = 0; i < sbi->s_groups_count; i++) {
2287 struct ext4_group_desc *desc;
2288
2289 meta_group_info =
2290 sbi->s_group_info[i >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2291 j = i & (EXT4_DESC_PER_BLOCK(sb) - 1);
2292
2293 meta_group_info[j] = kzalloc(len, GFP_KERNEL);
2294 if (meta_group_info[j] == NULL) {
2295 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2296 goto err_freebuddy;
2297 }
2298 desc = ext4_get_group_desc(sb, i, NULL); 2452 desc = ext4_get_group_desc(sb, i, NULL);
2299 if (desc == NULL) { 2453 if (desc == NULL) {
2300 printk(KERN_ERR 2454 printk(KERN_ERR
2301 "EXT4-fs: can't read descriptor %lu\n", i); 2455 "EXT4-fs: can't read descriptor %lu\n", i);
2302 i++;
2303 goto err_freebuddy; 2456 goto err_freebuddy;
2304 } 2457 }
2305 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, 2458 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2306 &(meta_group_info[j]->bb_state)); 2459 goto err_freebuddy;
2307
2308 /*
2309 * initialize bb_free to be able to skip
2310 * empty groups without initialization
2311 */
2312 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2313 meta_group_info[j]->bb_free =
2314 ext4_free_blocks_after_init(sb, i, desc);
2315 } else {
2316 meta_group_info[j]->bb_free =
2317 le16_to_cpu(desc->bg_free_blocks_count);
2318 }
2319
2320 INIT_LIST_HEAD(&meta_group_info[j]->bb_prealloc_list);
2321
2322#ifdef DOUBLE_CHECK
2323 {
2324 struct buffer_head *bh;
2325 meta_group_info[j]->bb_bitmap =
2326 kmalloc(sb->s_blocksize, GFP_KERNEL);
2327 BUG_ON(meta_group_info[j]->bb_bitmap == NULL);
2328 bh = ext4_read_block_bitmap(sb, i);
2329 BUG_ON(bh == NULL);
2330 memcpy(meta_group_info[j]->bb_bitmap, bh->b_data,
2331 sb->s_blocksize);
2332 put_bh(bh);
2333 }
2334#endif
2335
2336 } 2460 }
2337 2461
2338 return 0; 2462 return 0;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9ff7b1c04239..f000fbe2cd93 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -866,6 +866,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
866 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); 866 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
867 867
868 /* 868 /*
869 * We can allocate memory for mb_alloc based on the new group
870 * descriptor
871 */
872 if (test_opt(sb, MBALLOC)) {
873 err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
874 if (err)
875 goto exit_journal;
876 }
877 /*
869 * Make the new blocks and inodes valid next. We do this before 878 * Make the new blocks and inodes valid next. We do this before
870 * increasing the group count so that once the group is enabled, 879 * increasing the group count so that once the group is enabled,
871 * all of its blocks and inodes are already valid. 880 * all of its blocks and inodes are already valid.
@@ -957,6 +966,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
957 handle_t *handle; 966 handle_t *handle;
958 int err; 967 int err;
959 unsigned long freed_blocks; 968 unsigned long freed_blocks;
969 ext4_group_t group;
970 struct ext4_group_info *grp;
960 971
961 /* We don't need to worry about locking wrt other resizers just 972 /* We don't need to worry about locking wrt other resizers just
962 * yet: we're going to revalidate es->s_blocks_count after 973 * yet: we're going to revalidate es->s_blocks_count after
@@ -988,7 +999,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
988 } 999 }
989 1000
990 /* Handle the remaining blocks in the last group only. */ 1001 /* Handle the remaining blocks in the last group only. */
991 ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); 1002 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
992 1003
993 if (last == 0) { 1004 if (last == 0) {
994 ext4_warning(sb, __func__, 1005 ext4_warning(sb, __func__,
@@ -1060,6 +1071,45 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1060 o_blocks_count + add); 1071 o_blocks_count + add);
1061 if ((err = ext4_journal_stop(handle))) 1072 if ((err = ext4_journal_stop(handle)))
1062 goto exit_put; 1073 goto exit_put;
1074
1075 /*
1076 * Mark mballoc pages as not up to date so that they will be updated
1077 * next time they are loaded by ext4_mb_load_buddy.
1078 */
1079 if (test_opt(sb, MBALLOC)) {
1080 struct ext4_sb_info *sbi = EXT4_SB(sb);
1081 struct inode *inode = sbi->s_buddy_cache;
1082 int blocks_per_page;
1083 int block;
1084 int pnum;
1085 struct page *page;
1086
1087 /* Set buddy page as not up to date */
1088 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1089 block = group * 2;
1090 pnum = block / blocks_per_page;
1091 page = find_get_page(inode->i_mapping, pnum);
1092 if (page != NULL) {
1093 ClearPageUptodate(page);
1094 page_cache_release(page);
1095 }
1096
1097 /* Set bitmap page as not up to date */
1098 block++;
1099 pnum = block / blocks_per_page;
1100 page = find_get_page(inode->i_mapping, pnum);
1101 if (page != NULL) {
1102 ClearPageUptodate(page);
1103 page_cache_release(page);
1104 }
1105
1106 /* Get the info on the last group */
1107 grp = ext4_get_group_info(sb, group);
1108
1109 /* Update free blocks in group info */
1110 ext4_mb_update_group_info(grp, add);
1111 }
1112
1063 if (test_opt(sb, DEBUG)) 1113 if (test_opt(sb, DEBUG))
1064 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", 1114 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
1065 ext4_blocks_count(es)); 1115 ext4_blocks_count(es));