aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2012-09-05 01:31:50 -0400
committerTheodore Ts'o <tytso@mit.edu>2012-09-05 01:31:50 -0400
commit28623c2f5b0dca3c3ea34fd6108940661352e276 (patch)
treeec5bf1df32709f0ebf359ce05b16f8ef608d2e9e /fs
parent117fff10d7f140e12dd43df20d3f9fda80577460 (diff)
ext4: grow the s_group_info array as needed
Previously we allocated the s_group_info array with enough space for any future possible growth of the file system via online resize. This is unfortunate because it wastes memory, and it doesn't work for the meta_bg scheme, since there is no limit based on the number of reserved gdt blocks. So add the code to grow the s_group_info array as needed. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/mballoc.c79
-rw-r--r--fs/ext4/resize.c8
3 files changed, 50 insertions, 40 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 464cff711ed6..8b6902c4d7be 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1233,6 +1233,7 @@ struct ext4_sb_info {
1233 spinlock_t s_md_lock; 1233 spinlock_t s_md_lock;
1234 unsigned short *s_mb_offsets; 1234 unsigned short *s_mb_offsets;
1235 unsigned int *s_mb_maxs; 1235 unsigned int *s_mb_maxs;
1236 unsigned int s_group_info_size;
1236 1237
1237 /* tunables */ 1238 /* tunables */
1238 unsigned long s_stripe; 1239 unsigned long s_stripe;
@@ -1971,6 +1972,8 @@ extern void ext4_exit_mballoc(void);
1971extern void ext4_free_blocks(handle_t *handle, struct inode *inode, 1972extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1972 struct buffer_head *bh, ext4_fsblk_t block, 1973 struct buffer_head *bh, ext4_fsblk_t block,
1973 unsigned long count, int flags); 1974 unsigned long count, int flags);
1975extern int ext4_mb_alloc_groupinfo(struct super_block *sb,
1976 ext4_group_t ngroups);
1974extern int ext4_mb_add_groupinfo(struct super_block *sb, 1977extern int ext4_mb_add_groupinfo(struct super_block *sb,
1975 ext4_group_t i, struct ext4_group_desc *desc); 1978 ext4_group_t i, struct ext4_group_desc *desc);
1976extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, 1979extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 6873571c9f44..2102c20f7e98 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -24,6 +24,7 @@
24#include "ext4_jbd2.h" 24#include "ext4_jbd2.h"
25#include "mballoc.h" 25#include "mballoc.h"
26#include <linux/debugfs.h> 26#include <linux/debugfs.h>
27#include <linux/log2.h>
27#include <linux/slab.h> 28#include <linux/slab.h>
28#include <trace/events/ext4.h> 29#include <trace/events/ext4.h>
29 30
@@ -2163,6 +2164,39 @@ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2163 return cachep; 2164 return cachep;
2164} 2165}
2165 2166
2167/*
2168 * Allocate the top-level s_group_info array for the specified number
2169 * of groups
2170 */
2171int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2172{
2173 struct ext4_sb_info *sbi = EXT4_SB(sb);
2174 unsigned size;
2175 struct ext4_group_info ***new_groupinfo;
2176
2177 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2178 EXT4_DESC_PER_BLOCK_BITS(sb);
2179 if (size <= sbi->s_group_info_size)
2180 return 0;
2181
2182 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2183 new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
2184 if (!new_groupinfo) {
2185 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2186 return -ENOMEM;
2187 }
2188 if (sbi->s_group_info) {
2189 memcpy(new_groupinfo, sbi->s_group_info,
2190 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2191 ext4_kvfree(sbi->s_group_info);
2192 }
2193 sbi->s_group_info = new_groupinfo;
2194 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2195 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2196 sbi->s_group_info_size);
2197 return 0;
2198}
2199
2166/* Create and initialize ext4_group_info data for the given group. */ 2200/* Create and initialize ext4_group_info data for the given group. */
2167int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, 2201int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2168 struct ext4_group_desc *desc) 2202 struct ext4_group_desc *desc)
@@ -2252,49 +2286,14 @@ static int ext4_mb_init_backend(struct super_block *sb)
2252 ext4_group_t ngroups = ext4_get_groups_count(sb); 2286 ext4_group_t ngroups = ext4_get_groups_count(sb);
2253 ext4_group_t i; 2287 ext4_group_t i;
2254 struct ext4_sb_info *sbi = EXT4_SB(sb); 2288 struct ext4_sb_info *sbi = EXT4_SB(sb);
2255 struct ext4_super_block *es = sbi->s_es; 2289 int err;
2256 int num_meta_group_infos;
2257 int num_meta_group_infos_max;
2258 int array_size;
2259 struct ext4_group_desc *desc; 2290 struct ext4_group_desc *desc;
2260 struct kmem_cache *cachep; 2291 struct kmem_cache *cachep;
2261 2292
2262 /* This is the number of blocks used by GDT */ 2293 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2263 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 2294 if (err)
2264 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); 2295 return err;
2265
2266 /*
2267 * This is the total number of blocks used by GDT including
2268 * the number of reserved blocks for GDT.
2269 * The s_group_info array is allocated with this value
2270 * to allow a clean online resize without a complex
2271 * manipulation of pointer.
2272 * The drawback is the unused memory when no resize
2273 * occurs but it's very low in terms of pages
2274 * (see comments below)
2275 * Need to handle this properly when META_BG resizing is allowed
2276 */
2277 num_meta_group_infos_max = num_meta_group_infos +
2278 le16_to_cpu(es->s_reserved_gdt_blocks);
2279 2296
2280 /*
2281 * array_size is the size of s_group_info array. We round it
2282 * to the next power of two because this approximation is done
2283 * internally by kmalloc so we can have some more memory
2284 * for free here (e.g. may be used for META_BG resize).
2285 */
2286 array_size = 1;
2287 while (array_size < sizeof(*sbi->s_group_info) *
2288 num_meta_group_infos_max)
2289 array_size = array_size << 1;
2290 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
2291 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
2292 * So a two level scheme suffices for now. */
2293 sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
2294 if (sbi->s_group_info == NULL) {
2295 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2296 return -ENOMEM;
2297 }
2298 sbi->s_buddy_cache = new_inode(sb); 2297 sbi->s_buddy_cache = new_inode(sb);
2299 if (sbi->s_buddy_cache == NULL) { 2298 if (sbi->s_buddy_cache == NULL) {
2300 ext4_msg(sb, KERN_ERR, "can't get new inode"); 2299 ext4_msg(sb, KERN_ERR, "can't get new inode");
@@ -2322,7 +2321,7 @@ err_freebuddy:
2322 cachep = get_groupinfo_cache(sb->s_blocksize_bits); 2321 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2323 while (i-- > 0) 2322 while (i-- > 0)
2324 kmem_cache_free(cachep, ext4_get_group_info(sb, i)); 2323 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2325 i = num_meta_group_infos; 2324 i = sbi->s_group_info_size;
2326 while (i-- > 0) 2325 while (i-- > 0)
2327 kfree(sbi->s_group_info[i]); 2326 kfree(sbi->s_group_info[i]);
2328 iput(sbi->s_buddy_cache); 2327 iput(sbi->s_buddy_cache);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3f5c67bf13a2..f288933bf4c0 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1507,6 +1507,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
1507 if (err) 1507 if (err)
1508 return err; 1508 return err;
1509 1509
1510 err = ext4_mb_alloc_groupinfo(sb, input->group + 1);
1511 if (err)
1512 goto out;
1513
1510 flex_gd.count = 1; 1514 flex_gd.count = 1;
1511 flex_gd.groups = input; 1515 flex_gd.groups = input;
1512 flex_gd.bg_flags = &bg_flags; 1516 flex_gd.bg_flags = &bg_flags;
@@ -1732,6 +1736,10 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1732 if (err) 1736 if (err)
1733 return err; 1737 return err;
1734 1738
1739 err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
1740 if (err)
1741 goto out;
1742
1735 flex_gd = alloc_flex_gd(flexbg_size); 1743 flex_gd = alloc_flex_gd(flexbg_size);
1736 if (flex_gd == NULL) { 1744 if (flex_gd == NULL) {
1737 err = -ENOMEM; 1745 err = -ENOMEM;