aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c451
1 files changed, 317 insertions, 134 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c9900aade150..8d141a25bbee 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -381,22 +381,28 @@ static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
381 381
382static inline int mb_find_next_zero_bit(void *addr, int max, int start) 382static inline int mb_find_next_zero_bit(void *addr, int max, int start)
383{ 383{
384 int fix = 0; 384 int fix = 0, ret, tmpmax;
385 addr = mb_correct_addr_and_bit(&fix, addr); 385 addr = mb_correct_addr_and_bit(&fix, addr);
386 max += fix; 386 tmpmax = max + fix;
387 start += fix; 387 start += fix;
388 388
389 return ext4_find_next_zero_bit(addr, max, start) - fix; 389 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
390 if (ret > max)
391 return max;
392 return ret;
390} 393}
391 394
392static inline int mb_find_next_bit(void *addr, int max, int start) 395static inline int mb_find_next_bit(void *addr, int max, int start)
393{ 396{
394 int fix = 0; 397 int fix = 0, ret, tmpmax;
395 addr = mb_correct_addr_and_bit(&fix, addr); 398 addr = mb_correct_addr_and_bit(&fix, addr);
396 max += fix; 399 tmpmax = max + fix;
397 start += fix; 400 start += fix;
398 401
399 return ext4_find_next_bit(addr, max, start) - fix; 402 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
403 if (ret > max)
404 return max;
405 return ret;
400} 406}
401 407
402static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) 408static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
@@ -803,6 +809,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
803 if (!buffer_uptodate(bh[i])) 809 if (!buffer_uptodate(bh[i]))
804 goto out; 810 goto out;
805 811
812 err = 0;
806 first_block = page->index * blocks_per_page; 813 first_block = page->index * blocks_per_page;
807 for (i = 0; i < blocks_per_page; i++) { 814 for (i = 0; i < blocks_per_page; i++) {
808 int group; 815 int group;
@@ -883,6 +890,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
883 int pnum; 890 int pnum;
884 int poff; 891 int poff;
885 struct page *page; 892 struct page *page;
893 int ret;
886 894
887 mb_debug("load group %lu\n", group); 895 mb_debug("load group %lu\n", group);
888 896
@@ -914,15 +922,21 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
914 if (page) { 922 if (page) {
915 BUG_ON(page->mapping != inode->i_mapping); 923 BUG_ON(page->mapping != inode->i_mapping);
916 if (!PageUptodate(page)) { 924 if (!PageUptodate(page)) {
917 ext4_mb_init_cache(page, NULL); 925 ret = ext4_mb_init_cache(page, NULL);
926 if (ret) {
927 unlock_page(page);
928 goto err;
929 }
918 mb_cmp_bitmaps(e4b, page_address(page) + 930 mb_cmp_bitmaps(e4b, page_address(page) +
919 (poff * sb->s_blocksize)); 931 (poff * sb->s_blocksize));
920 } 932 }
921 unlock_page(page); 933 unlock_page(page);
922 } 934 }
923 } 935 }
924 if (page == NULL || !PageUptodate(page)) 936 if (page == NULL || !PageUptodate(page)) {
937 ret = -EIO;
925 goto err; 938 goto err;
939 }
926 e4b->bd_bitmap_page = page; 940 e4b->bd_bitmap_page = page;
927 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); 941 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
928 mark_page_accessed(page); 942 mark_page_accessed(page);
@@ -938,14 +952,20 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
938 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); 952 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
939 if (page) { 953 if (page) {
940 BUG_ON(page->mapping != inode->i_mapping); 954 BUG_ON(page->mapping != inode->i_mapping);
941 if (!PageUptodate(page)) 955 if (!PageUptodate(page)) {
942 ext4_mb_init_cache(page, e4b->bd_bitmap); 956 ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
943 957 if (ret) {
958 unlock_page(page);
959 goto err;
960 }
961 }
944 unlock_page(page); 962 unlock_page(page);
945 } 963 }
946 } 964 }
947 if (page == NULL || !PageUptodate(page)) 965 if (page == NULL || !PageUptodate(page)) {
966 ret = -EIO;
948 goto err; 967 goto err;
968 }
949 e4b->bd_buddy_page = page; 969 e4b->bd_buddy_page = page;
950 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); 970 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
951 mark_page_accessed(page); 971 mark_page_accessed(page);
@@ -962,7 +982,7 @@ err:
962 page_cache_release(e4b->bd_buddy_page); 982 page_cache_release(e4b->bd_buddy_page);
963 e4b->bd_buddy = NULL; 983 e4b->bd_buddy = NULL;
964 e4b->bd_bitmap = NULL; 984 e4b->bd_bitmap = NULL;
965 return -EIO; 985 return ret;
966} 986}
967 987
968static void ext4_mb_release_desc(struct ext4_buddy *e4b) 988static void ext4_mb_release_desc(struct ext4_buddy *e4b)
@@ -1031,7 +1051,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
1031 } 1051 }
1032} 1052}
1033 1053
1034static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, 1054static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1035 int first, int count) 1055 int first, int count)
1036{ 1056{
1037 int block = 0; 1057 int block = 0;
@@ -1071,11 +1091,12 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1071 blocknr += block; 1091 blocknr += block;
1072 blocknr += 1092 blocknr +=
1073 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 1093 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1074 1094 ext4_unlock_group(sb, e4b->bd_group);
1075 ext4_error(sb, __func__, "double-free of inode" 1095 ext4_error(sb, __func__, "double-free of inode"
1076 " %lu's block %llu(bit %u in group %lu)\n", 1096 " %lu's block %llu(bit %u in group %lu)\n",
1077 inode ? inode->i_ino : 0, blocknr, block, 1097 inode ? inode->i_ino : 0, blocknr, block,
1078 e4b->bd_group); 1098 e4b->bd_group);
1099 ext4_lock_group(sb, e4b->bd_group);
1079 } 1100 }
1080 mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); 1101 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
1081 e4b->bd_info->bb_counters[order]++; 1102 e4b->bd_info->bb_counters[order]++;
@@ -1113,8 +1134,6 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1113 } while (1); 1134 } while (1);
1114 } 1135 }
1115 mb_check_buddy(e4b); 1136 mb_check_buddy(e4b);
1116
1117 return 0;
1118} 1137}
1119 1138
1120static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, 1139static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
@@ -1730,10 +1749,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1730 ac->ac_g_ex.fe_start = sbi->s_mb_last_start; 1749 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
1731 spin_unlock(&sbi->s_md_lock); 1750 spin_unlock(&sbi->s_md_lock);
1732 } 1751 }
1733
1734 /* searching for the right group start from the goal value specified */
1735 group = ac->ac_g_ex.fe_group;
1736
1737 /* Let's just scan groups to find more-less suitable blocks */ 1752 /* Let's just scan groups to find more-less suitable blocks */
1738 cr = ac->ac_2order ? 0 : 1; 1753 cr = ac->ac_2order ? 0 : 1;
1739 /* 1754 /*
@@ -1743,6 +1758,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1743repeat: 1758repeat:
1744 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) { 1759 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
1745 ac->ac_criteria = cr; 1760 ac->ac_criteria = cr;
1761 /*
1762 * searching for the right group start
1763 * from the goal value specified
1764 */
1765 group = ac->ac_g_ex.fe_group;
1766
1746 for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { 1767 for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
1747 struct ext4_group_info *grp; 1768 struct ext4_group_info *grp;
1748 struct ext4_group_desc *desc; 1769 struct ext4_group_desc *desc;
@@ -1963,6 +1984,8 @@ static int ext4_mb_seq_history_open(struct inode *inode, struct file *file)
1963 int rc; 1984 int rc;
1964 int size; 1985 int size;
1965 1986
1987 if (unlikely(sbi->s_mb_history == NULL))
1988 return -ENOMEM;
1966 s = kmalloc(sizeof(*s), GFP_KERNEL); 1989 s = kmalloc(sizeof(*s), GFP_KERNEL);
1967 if (s == NULL) 1990 if (s == NULL)
1968 return -ENOMEM; 1991 return -ENOMEM;
@@ -2165,9 +2188,7 @@ static void ext4_mb_history_init(struct super_block *sb)
2165 sbi->s_mb_history_cur = 0; 2188 sbi->s_mb_history_cur = 0;
2166 spin_lock_init(&sbi->s_mb_history_lock); 2189 spin_lock_init(&sbi->s_mb_history_lock);
2167 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); 2190 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
2168 sbi->s_mb_history = kmalloc(i, GFP_KERNEL); 2191 sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
2169 if (likely(sbi->s_mb_history != NULL))
2170 memset(sbi->s_mb_history, 0, i);
2171 /* if we can't allocate history, then we simple won't use it */ 2192 /* if we can't allocate history, then we simple won't use it */
2172} 2193}
2173 2194
@@ -2215,21 +2236,192 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
2215#define ext4_mb_history_init(sb) 2236#define ext4_mb_history_init(sb)
2216#endif 2237#endif
2217 2238
2239
2240/* Create and initialize ext4_group_info data for the given group. */
2241int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2242 struct ext4_group_desc *desc)
2243{
2244 int i, len;
2245 int metalen = 0;
2246 struct ext4_sb_info *sbi = EXT4_SB(sb);
2247 struct ext4_group_info **meta_group_info;
2248
2249 /*
2250 * First check if this group is the first of a reserved block.
2251 * If it's true, we have to allocate a new table of pointers
2252 * to ext4_group_info structures
2253 */
2254 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2255 metalen = sizeof(*meta_group_info) <<
2256 EXT4_DESC_PER_BLOCK_BITS(sb);
2257 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2258 if (meta_group_info == NULL) {
2259 printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
2260 "buddy group\n");
2261 goto exit_meta_group_info;
2262 }
2263 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2264 meta_group_info;
2265 }
2266
2267 /*
2268 * calculate needed size. if change bb_counters size,
2269 * don't forget about ext4_mb_generate_buddy()
2270 */
2271 len = offsetof(typeof(**meta_group_info),
2272 bb_counters[sb->s_blocksize_bits + 2]);
2273
2274 meta_group_info =
2275 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2276 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2277
2278 meta_group_info[i] = kzalloc(len, GFP_KERNEL);
2279 if (meta_group_info[i] == NULL) {
2280 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2281 goto exit_group_info;
2282 }
2283 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2284 &(meta_group_info[i]->bb_state));
2285
2286 /*
2287 * initialize bb_free to be able to skip
2288 * empty groups without initialization
2289 */
2290 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2291 meta_group_info[i]->bb_free =
2292 ext4_free_blocks_after_init(sb, group, desc);
2293 } else {
2294 meta_group_info[i]->bb_free =
2295 le16_to_cpu(desc->bg_free_blocks_count);
2296 }
2297
2298 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2299
2300#ifdef DOUBLE_CHECK
2301 {
2302 struct buffer_head *bh;
2303 meta_group_info[i]->bb_bitmap =
2304 kmalloc(sb->s_blocksize, GFP_KERNEL);
2305 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2306 bh = ext4_read_block_bitmap(sb, group);
2307 BUG_ON(bh == NULL);
2308 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2309 sb->s_blocksize);
2310 put_bh(bh);
2311 }
2312#endif
2313
2314 return 0;
2315
2316exit_group_info:
2317 /* If a meta_group_info table has been allocated, release it now */
2318 if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
2319 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2320exit_meta_group_info:
2321 return -ENOMEM;
2322} /* ext4_mb_add_groupinfo */
2323
2324/*
2325 * Add a group to the existing groups.
2326 * This function is used for online resize
2327 */
2328int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
2329 struct ext4_group_desc *desc)
2330{
2331 struct ext4_sb_info *sbi = EXT4_SB(sb);
2332 struct inode *inode = sbi->s_buddy_cache;
2333 int blocks_per_page;
2334 int block;
2335 int pnum;
2336 struct page *page;
2337 int err;
2338
2339 /* Add group based on group descriptor*/
2340 err = ext4_mb_add_groupinfo(sb, group, desc);
2341 if (err)
2342 return err;
2343
2344 /*
2345 * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
2346 * datas) are set not up to date so that they will be re-initilaized
2347 * during the next call to ext4_mb_load_buddy
2348 */
2349
2350 /* Set buddy page as not up to date */
2351 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
2352 block = group * 2;
2353 pnum = block / blocks_per_page;
2354 page = find_get_page(inode->i_mapping, pnum);
2355 if (page != NULL) {
2356 ClearPageUptodate(page);
2357 page_cache_release(page);
2358 }
2359
2360 /* Set bitmap page as not up to date */
2361 block++;
2362 pnum = block / blocks_per_page;
2363 page = find_get_page(inode->i_mapping, pnum);
2364 if (page != NULL) {
2365 ClearPageUptodate(page);
2366 page_cache_release(page);
2367 }
2368
2369 return 0;
2370}
2371
2372/*
2373 * Update an existing group.
2374 * This function is used for online resize
2375 */
2376void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
2377{
2378 grp->bb_free += add;
2379}
2380
2218static int ext4_mb_init_backend(struct super_block *sb) 2381static int ext4_mb_init_backend(struct super_block *sb)
2219{ 2382{
2220 ext4_group_t i; 2383 ext4_group_t i;
2221 int j, len, metalen; 2384 int metalen;
2222 struct ext4_sb_info *sbi = EXT4_SB(sb); 2385 struct ext4_sb_info *sbi = EXT4_SB(sb);
2223 int num_meta_group_infos = 2386 struct ext4_super_block *es = sbi->s_es;
2224 (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) >> 2387 int num_meta_group_infos;
2225 EXT4_DESC_PER_BLOCK_BITS(sb); 2388 int num_meta_group_infos_max;
2389 int array_size;
2226 struct ext4_group_info **meta_group_info; 2390 struct ext4_group_info **meta_group_info;
2391 struct ext4_group_desc *desc;
2392
2393 /* This is the number of blocks used by GDT */
2394 num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
2395 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2396
2397 /*
2398 * This is the total number of blocks used by GDT including
2399 * the number of reserved blocks for GDT.
2400 * The s_group_info array is allocated with this value
2401 * to allow a clean online resize without a complex
2402 * manipulation of pointer.
2403 * The drawback is the unused memory when no resize
2404 * occurs but it's very low in terms of pages
2405 * (see comments below)
2406 * Need to handle this properly when META_BG resizing is allowed
2407 */
2408 num_meta_group_infos_max = num_meta_group_infos +
2409 le16_to_cpu(es->s_reserved_gdt_blocks);
2227 2410
2411 /*
2412 * array_size is the size of s_group_info array. We round it
2413 * to the next power of two because this approximation is done
2414 * internally by kmalloc so we can have some more memory
2415 * for free here (e.g. may be used for META_BG resize).
2416 */
2417 array_size = 1;
2418 while (array_size < sizeof(*sbi->s_group_info) *
2419 num_meta_group_infos_max)
2420 array_size = array_size << 1;
2228 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte 2421 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
2229 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. 2422 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
2230 * So a two level scheme suffices for now. */ 2423 * So a two level scheme suffices for now. */
2231 sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * 2424 sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
2232 num_meta_group_infos, GFP_KERNEL);
2233 if (sbi->s_group_info == NULL) { 2425 if (sbi->s_group_info == NULL) {
2234 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); 2426 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
2235 return -ENOMEM; 2427 return -ENOMEM;
@@ -2256,63 +2448,15 @@ static int ext4_mb_init_backend(struct super_block *sb)
2256 sbi->s_group_info[i] = meta_group_info; 2448 sbi->s_group_info[i] = meta_group_info;
2257 } 2449 }
2258 2450
2259 /*
2260 * calculate needed size. if change bb_counters size,
2261 * don't forget about ext4_mb_generate_buddy()
2262 */
2263 len = sizeof(struct ext4_group_info);
2264 len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2);
2265 for (i = 0; i < sbi->s_groups_count; i++) { 2451 for (i = 0; i < sbi->s_groups_count; i++) {
2266 struct ext4_group_desc *desc;
2267
2268 meta_group_info =
2269 sbi->s_group_info[i >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2270 j = i & (EXT4_DESC_PER_BLOCK(sb) - 1);
2271
2272 meta_group_info[j] = kzalloc(len, GFP_KERNEL);
2273 if (meta_group_info[j] == NULL) {
2274 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2275 goto err_freebuddy;
2276 }
2277 desc = ext4_get_group_desc(sb, i, NULL); 2452 desc = ext4_get_group_desc(sb, i, NULL);
2278 if (desc == NULL) { 2453 if (desc == NULL) {
2279 printk(KERN_ERR 2454 printk(KERN_ERR
2280 "EXT4-fs: can't read descriptor %lu\n", i); 2455 "EXT4-fs: can't read descriptor %lu\n", i);
2281 i++;
2282 goto err_freebuddy; 2456 goto err_freebuddy;
2283 } 2457 }
2284 memset(meta_group_info[j], 0, len); 2458 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2285 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, 2459 goto err_freebuddy;
2286 &(meta_group_info[j]->bb_state));
2287
2288 /*
2289 * initialize bb_free to be able to skip
2290 * empty groups without initialization
2291 */
2292 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2293 meta_group_info[j]->bb_free =
2294 ext4_free_blocks_after_init(sb, i, desc);
2295 } else {
2296 meta_group_info[j]->bb_free =
2297 le16_to_cpu(desc->bg_free_blocks_count);
2298 }
2299
2300 INIT_LIST_HEAD(&meta_group_info[j]->bb_prealloc_list);
2301
2302#ifdef DOUBLE_CHECK
2303 {
2304 struct buffer_head *bh;
2305 meta_group_info[j]->bb_bitmap =
2306 kmalloc(sb->s_blocksize, GFP_KERNEL);
2307 BUG_ON(meta_group_info[j]->bb_bitmap == NULL);
2308 bh = read_block_bitmap(sb, i);
2309 BUG_ON(bh == NULL);
2310 memcpy(meta_group_info[j]->bb_bitmap, bh->b_data,
2311 sb->s_blocksize);
2312 put_bh(bh);
2313 }
2314#endif
2315
2316 } 2460 }
2317 2461
2318 return 0; 2462 return 0;
@@ -2336,6 +2480,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2336 unsigned i; 2480 unsigned i;
2337 unsigned offset; 2481 unsigned offset;
2338 unsigned max; 2482 unsigned max;
2483 int ret;
2339 2484
2340 if (!test_opt(sb, MBALLOC)) 2485 if (!test_opt(sb, MBALLOC))
2341 return 0; 2486 return 0;
@@ -2370,12 +2515,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2370 } while (i <= sb->s_blocksize_bits + 1); 2515 } while (i <= sb->s_blocksize_bits + 1);
2371 2516
2372 /* init file for buddy data */ 2517 /* init file for buddy data */
2373 i = ext4_mb_init_backend(sb); 2518 ret = ext4_mb_init_backend(sb);
2374 if (i) { 2519 if (ret != 0) {
2375 clear_opt(sbi->s_mount_opt, MBALLOC); 2520 clear_opt(sbi->s_mount_opt, MBALLOC);
2376 kfree(sbi->s_mb_offsets); 2521 kfree(sbi->s_mb_offsets);
2377 kfree(sbi->s_mb_maxs); 2522 kfree(sbi->s_mb_maxs);
2378 return i; 2523 return ret;
2379 } 2524 }
2380 2525
2381 spin_lock_init(&sbi->s_md_lock); 2526 spin_lock_init(&sbi->s_md_lock);
@@ -2548,8 +2693,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
2548 ext4_lock_group(sb, md->group); 2693 ext4_lock_group(sb, md->group);
2549 for (i = 0; i < md->num; i++) { 2694 for (i = 0; i < md->num; i++) {
2550 mb_debug(" %u", md->blocks[i]); 2695 mb_debug(" %u", md->blocks[i]);
2551 err = mb_free_blocks(NULL, &e4b, md->blocks[i], 1); 2696 mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
2552 BUG_ON(err != 0);
2553 } 2697 }
2554 mb_debug("\n"); 2698 mb_debug("\n");
2555 ext4_unlock_group(sb, md->group); 2699 ext4_unlock_group(sb, md->group);
@@ -2575,25 +2719,24 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
2575 2719
2576 2720
2577 2721
2578#define MB_PROC_VALUE_READ(name) \ 2722#define MB_PROC_FOPS(name) \
2579static int ext4_mb_read_##name(char *page, char **start, \ 2723static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \
2580 off_t off, int count, int *eof, void *data) \
2581{ \ 2724{ \
2582 struct ext4_sb_info *sbi = data; \ 2725 struct ext4_sb_info *sbi = m->private; \
2583 int len; \ 2726 \
2584 *eof = 1; \ 2727 seq_printf(m, "%ld\n", sbi->s_mb_##name); \
2585 if (off != 0) \ 2728 return 0; \
2586 return 0; \ 2729} \
2587 len = sprintf(page, "%ld\n", sbi->s_mb_##name); \ 2730 \
2588 *start = page; \ 2731static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\
2589 return len; \ 2732{ \
2590} 2733 return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\
2591 2734} \
2592#define MB_PROC_VALUE_WRITE(name) \ 2735 \
2593static int ext4_mb_write_##name(struct file *file, \ 2736static ssize_t ext4_mb_##name##_proc_write(struct file *file, \
2594 const char __user *buf, unsigned long cnt, void *data) \ 2737 const char __user *buf, size_t cnt, loff_t *ppos) \
2595{ \ 2738{ \
2596 struct ext4_sb_info *sbi = data; \ 2739 struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\
2597 char str[32]; \ 2740 char str[32]; \
2598 long value; \ 2741 long value; \
2599 if (cnt >= sizeof(str)) \ 2742 if (cnt >= sizeof(str)) \
@@ -2605,31 +2748,32 @@ static int ext4_mb_write_##name(struct file *file, \
2605 return -ERANGE; \ 2748 return -ERANGE; \
2606 sbi->s_mb_##name = value; \ 2749 sbi->s_mb_##name = value; \
2607 return cnt; \ 2750 return cnt; \
2608} 2751} \
2752 \
2753static const struct file_operations ext4_mb_##name##_proc_fops = { \
2754 .owner = THIS_MODULE, \
2755 .open = ext4_mb_##name##_proc_open, \
2756 .read = seq_read, \
2757 .llseek = seq_lseek, \
2758 .release = single_release, \
2759 .write = ext4_mb_##name##_proc_write, \
2760};
2609 2761
2610MB_PROC_VALUE_READ(stats); 2762MB_PROC_FOPS(stats);
2611MB_PROC_VALUE_WRITE(stats); 2763MB_PROC_FOPS(max_to_scan);
2612MB_PROC_VALUE_READ(max_to_scan); 2764MB_PROC_FOPS(min_to_scan);
2613MB_PROC_VALUE_WRITE(max_to_scan); 2765MB_PROC_FOPS(order2_reqs);
2614MB_PROC_VALUE_READ(min_to_scan); 2766MB_PROC_FOPS(stream_request);
2615MB_PROC_VALUE_WRITE(min_to_scan); 2767MB_PROC_FOPS(group_prealloc);
2616MB_PROC_VALUE_READ(order2_reqs);
2617MB_PROC_VALUE_WRITE(order2_reqs);
2618MB_PROC_VALUE_READ(stream_request);
2619MB_PROC_VALUE_WRITE(stream_request);
2620MB_PROC_VALUE_READ(group_prealloc);
2621MB_PROC_VALUE_WRITE(group_prealloc);
2622 2768
2623#define MB_PROC_HANDLER(name, var) \ 2769#define MB_PROC_HANDLER(name, var) \
2624do { \ 2770do { \
2625 proc = create_proc_entry(name, mode, sbi->s_mb_proc); \ 2771 proc = proc_create_data(name, mode, sbi->s_mb_proc, \
2772 &ext4_mb_##var##_proc_fops, sbi); \
2626 if (proc == NULL) { \ 2773 if (proc == NULL) { \
2627 printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ 2774 printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
2628 goto err_out; \ 2775 goto err_out; \
2629 } \ 2776 } \
2630 proc->data = sbi; \
2631 proc->read_proc = ext4_mb_read_##var ; \
2632 proc->write_proc = ext4_mb_write_##var; \
2633} while (0) 2777} while (0)
2634 2778
2635static int ext4_mb_init_per_dev_proc(struct super_block *sb) 2779static int ext4_mb_init_per_dev_proc(struct super_block *sb)
@@ -2639,6 +2783,10 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2639 struct proc_dir_entry *proc; 2783 struct proc_dir_entry *proc;
2640 char devname[64]; 2784 char devname[64];
2641 2785
2786 if (proc_root_ext4 == NULL) {
2787 sbi->s_mb_proc = NULL;
2788 return -EINVAL;
2789 }
2642 bdevname(sb->s_bdev, devname); 2790 bdevname(sb->s_bdev, devname);
2643 sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); 2791 sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
2644 2792
@@ -2747,7 +2895,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2747 2895
2748 2896
2749 err = -EIO; 2897 err = -EIO;
2750 bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); 2898 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2751 if (!bitmap_bh) 2899 if (!bitmap_bh)
2752 goto out_err; 2900 goto out_err;
2753 2901
@@ -2816,7 +2964,23 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2816 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); 2964 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
2817 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2965 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2818 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2966 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
2819 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2967
2968 /*
2969 * free blocks account has already be reduced/reserved
2970 * at write_begin() time for delayed allocation
2971 * do not double accounting
2972 */
2973 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2974 percpu_counter_sub(&sbi->s_freeblocks_counter,
2975 ac->ac_b_ex.fe_len);
2976
2977 if (sbi->s_log_groups_per_flex) {
2978 ext4_group_t flex_group = ext4_flex_group(sbi,
2979 ac->ac_b_ex.fe_group);
2980 spin_lock(sb_bgl_lock(sbi, flex_group));
2981 sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len;
2982 spin_unlock(sb_bgl_lock(sbi, flex_group));
2983 }
2820 2984
2821 err = ext4_journal_dirty_metadata(handle, bitmap_bh); 2985 err = ext4_journal_dirty_metadata(handle, bitmap_bh);
2822 if (err) 2986 if (err)
@@ -3473,8 +3637,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3473 if (bit >= end) 3637 if (bit >= end)
3474 break; 3638 break;
3475 next = mb_find_next_bit(bitmap_bh->b_data, end, bit); 3639 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3476 if (next > end)
3477 next = end;
3478 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + 3640 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
3479 le32_to_cpu(sbi->s_es->s_first_data_block); 3641 le32_to_cpu(sbi->s_es->s_first_data_block);
3480 mb_debug(" free preallocated %u/%u in group %u\n", 3642 mb_debug(" free preallocated %u/%u in group %u\n",
@@ -3569,7 +3731,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3569 if (list_empty(&grp->bb_prealloc_list)) 3731 if (list_empty(&grp->bb_prealloc_list))
3570 return 0; 3732 return 0;
3571 3733
3572 bitmap_bh = read_block_bitmap(sb, group); 3734 bitmap_bh = ext4_read_block_bitmap(sb, group);
3573 if (bitmap_bh == NULL) { 3735 if (bitmap_bh == NULL) {
3574 /* error handling here */ 3736 /* error handling here */
3575 ext4_mb_release_desc(&e4b); 3737 ext4_mb_release_desc(&e4b);
@@ -3743,7 +3905,7 @@ repeat:
3743 err = ext4_mb_load_buddy(sb, group, &e4b); 3905 err = ext4_mb_load_buddy(sb, group, &e4b);
3744 BUG_ON(err != 0); /* error handling here */ 3906 BUG_ON(err != 0); /* error handling here */
3745 3907
3746 bitmap_bh = read_block_bitmap(sb, group); 3908 bitmap_bh = ext4_read_block_bitmap(sb, group);
3747 if (bitmap_bh == NULL) { 3909 if (bitmap_bh == NULL) {
3748 /* error handling here */ 3910 /* error handling here */
3749 ext4_mb_release_desc(&e4b); 3911 ext4_mb_release_desc(&e4b);
@@ -4011,10 +4173,21 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4011 sbi = EXT4_SB(sb); 4173 sbi = EXT4_SB(sb);
4012 4174
4013 if (!test_opt(sb, MBALLOC)) { 4175 if (!test_opt(sb, MBALLOC)) {
4014 block = ext4_new_blocks_old(handle, ar->inode, ar->goal, 4176 block = ext4_old_new_blocks(handle, ar->inode, ar->goal,
4015 &(ar->len), errp); 4177 &(ar->len), errp);
4016 return block; 4178 return block;
4017 } 4179 }
4180 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
4181 /*
4182 * With delalloc we already reserved the blocks
4183 */
4184 ar->len = ext4_has_free_blocks(sbi, ar->len);
4185 }
4186
4187 if (ar->len == 0) {
4188 *errp = -ENOSPC;
4189 return 0;
4190 }
4018 4191
4019 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { 4192 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
4020 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4193 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
@@ -4026,10 +4199,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4026 } 4199 }
4027 inquota = ar->len; 4200 inquota = ar->len;
4028 4201
4202 if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
4203 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4204
4029 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4205 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4030 if (!ac) { 4206 if (!ac) {
4207 ar->len = 0;
4031 *errp = -ENOMEM; 4208 *errp = -ENOMEM;
4032 return 0; 4209 goto out1;
4033 } 4210 }
4034 4211
4035 ext4_mb_poll_new_transaction(sb, handle); 4212 ext4_mb_poll_new_transaction(sb, handle);
@@ -4037,12 +4214,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4037 *errp = ext4_mb_initialize_context(ac, ar); 4214 *errp = ext4_mb_initialize_context(ac, ar);
4038 if (*errp) { 4215 if (*errp) {
4039 ar->len = 0; 4216 ar->len = 0;
4040 goto out; 4217 goto out2;
4041 } 4218 }
4042 4219
4043 ac->ac_op = EXT4_MB_HISTORY_PREALLOC; 4220 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4044 if (!ext4_mb_use_preallocated(ac)) { 4221 if (!ext4_mb_use_preallocated(ac)) {
4045
4046 ac->ac_op = EXT4_MB_HISTORY_ALLOC; 4222 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4047 ext4_mb_normalize_request(ac, ar); 4223 ext4_mb_normalize_request(ac, ar);
4048repeat: 4224repeat:
@@ -4085,11 +4261,12 @@ repeat:
4085 4261
4086 ext4_mb_release_context(ac); 4262 ext4_mb_release_context(ac);
4087 4263
4088out: 4264out2:
4265 kmem_cache_free(ext4_ac_cachep, ac);
4266out1:
4089 if (ar->len < inquota) 4267 if (ar->len < inquota)
4090 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); 4268 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
4091 4269
4092 kmem_cache_free(ext4_ac_cachep, ac);
4093 return block; 4270 return block;
4094} 4271}
4095static void ext4_mb_poll_new_transaction(struct super_block *sb, 4272static void ext4_mb_poll_new_transaction(struct super_block *sb,
@@ -4242,7 +4419,7 @@ do_more:
4242 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); 4419 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
4243 count -= overflow; 4420 count -= overflow;
4244 } 4421 }
4245 bitmap_bh = read_block_bitmap(sb, block_group); 4422 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4246 if (!bitmap_bh) 4423 if (!bitmap_bh)
4247 goto error_return; 4424 goto error_return;
4248 gdp = ext4_get_group_desc(sb, block_group, &gd_bh); 4425 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
@@ -4309,10 +4486,9 @@ do_more:
4309 ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); 4486 ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
4310 } else { 4487 } else {
4311 ext4_lock_group(sb, block_group); 4488 ext4_lock_group(sb, block_group);
4312 err = mb_free_blocks(inode, &e4b, bit, count); 4489 mb_free_blocks(inode, &e4b, bit, count);
4313 ext4_mb_return_to_preallocation(inode, &e4b, block, count); 4490 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4314 ext4_unlock_group(sb, block_group); 4491 ext4_unlock_group(sb, block_group);
4315 BUG_ON(err != 0);
4316 } 4492 }
4317 4493
4318 spin_lock(sb_bgl_lock(sbi, block_group)); 4494 spin_lock(sb_bgl_lock(sbi, block_group));
@@ -4321,6 +4497,13 @@ do_more:
4321 spin_unlock(sb_bgl_lock(sbi, block_group)); 4497 spin_unlock(sb_bgl_lock(sbi, block_group));
4322 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4498 percpu_counter_add(&sbi->s_freeblocks_counter, count);
4323 4499
4500 if (sbi->s_log_groups_per_flex) {
4501 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4502 spin_lock(sb_bgl_lock(sbi, flex_group));
4503 sbi->s_flex_groups[flex_group].free_blocks += count;
4504 spin_unlock(sb_bgl_lock(sbi, flex_group));
4505 }
4506
4324 ext4_mb_release_desc(&e4b); 4507 ext4_mb_release_desc(&e4b);
4325 4508
4326 *freed += count; 4509 *freed += count;