aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c437
1 files changed, 71 insertions, 366 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9d57695de746..fbec2ef93797 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,21 +21,7 @@
21 * mballoc.c contains the multiblocks allocation routines 21 * mballoc.c contains the multiblocks allocation routines
22 */ 22 */
23 23
24#include <linux/time.h> 24#include "mballoc.h"
25#include <linux/fs.h>
26#include <linux/namei.h>
27#include <linux/ext4_jbd2.h>
28#include <linux/ext4_fs.h>
29#include <linux/quotaops.h>
30#include <linux/buffer_head.h>
31#include <linux/module.h>
32#include <linux/swap.h>
33#include <linux/proc_fs.h>
34#include <linux/pagemap.h>
35#include <linux/seq_file.h>
36#include <linux/version.h>
37#include "group.h"
38
39/* 25/*
40 * MUSTDO: 26 * MUSTDO:
41 * - test ext4_ext_search_left() and ext4_ext_search_right() 27 * - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -345,288 +331,6 @@
345 * 331 *
346 */ 332 */
347 333
348/*
349 * with AGGRESSIVE_CHECK allocator runs consistency checks over
350 * structures. these checks slow things down a lot
351 */
352#define AGGRESSIVE_CHECK__
353
354/*
355 * with DOUBLE_CHECK defined mballoc creates persistent in-core
356 * bitmaps, maintains and uses them to check for double allocations
357 */
358#define DOUBLE_CHECK__
359
360/*
361 */
362#define MB_DEBUG__
363#ifdef MB_DEBUG
364#define mb_debug(fmt, a...) printk(fmt, ##a)
365#else
366#define mb_debug(fmt, a...)
367#endif
368
369/*
370 * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
371 * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
372 */
373#define EXT4_MB_HISTORY
374#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
375#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
376#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
377#define EXT4_MB_HISTORY_FREE 8 /* free */
378
379#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
380 EXT4_MB_HISTORY_PREALLOC)
381
382/*
383 * How long mballoc can look for a best extent (in found extents)
384 */
385#define MB_DEFAULT_MAX_TO_SCAN 200
386
387/*
388 * How long mballoc must look for a best extent
389 */
390#define MB_DEFAULT_MIN_TO_SCAN 10
391
392/*
393 * How many groups mballoc will scan looking for the best chunk
394 */
395#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
396
397/*
398 * with 'ext4_mb_stats' allocator will collect stats that will be
399 * shown at umount. The collecting costs though!
400 */
401#define MB_DEFAULT_STATS 1
402
403/*
404 * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
405 * by the stream allocator, which purpose is to pack requests
406 * as close each to other as possible to produce smooth I/O traffic
407 * We use locality group prealloc space for stream request.
408 * We can tune the same via /proc/fs/ext4/<parition>/stream_req
409 */
410#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
411
412/*
413 * for which requests use 2^N search using buddies
414 */
415#define MB_DEFAULT_ORDER2_REQS 2
416
417/*
418 * default group prealloc size 512 blocks
419 */
420#define MB_DEFAULT_GROUP_PREALLOC 512
421
422static struct kmem_cache *ext4_pspace_cachep;
423static struct kmem_cache *ext4_ac_cachep;
424
425#ifdef EXT4_BB_MAX_BLOCKS
426#undef EXT4_BB_MAX_BLOCKS
427#endif
428#define EXT4_BB_MAX_BLOCKS 30
429
430struct ext4_free_metadata {
431 ext4_group_t group;
432 unsigned short num;
433 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
434 struct list_head list;
435};
436
437struct ext4_group_info {
438 unsigned long bb_state;
439 unsigned long bb_tid;
440 struct ext4_free_metadata *bb_md_cur;
441 unsigned short bb_first_free;
442 unsigned short bb_free;
443 unsigned short bb_fragments;
444 struct list_head bb_prealloc_list;
445#ifdef DOUBLE_CHECK
446 void *bb_bitmap;
447#endif
448 unsigned short bb_counters[];
449};
450
451#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
452#define EXT4_GROUP_INFO_LOCKED_BIT 1
453
454#define EXT4_MB_GRP_NEED_INIT(grp) \
455 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
456
457
458struct ext4_prealloc_space {
459 struct list_head pa_inode_list;
460 struct list_head pa_group_list;
461 union {
462 struct list_head pa_tmp_list;
463 struct rcu_head pa_rcu;
464 } u;
465 spinlock_t pa_lock;
466 atomic_t pa_count;
467 unsigned pa_deleted;
468 ext4_fsblk_t pa_pstart; /* phys. block */
469 ext4_lblk_t pa_lstart; /* log. block */
470 unsigned short pa_len; /* len of preallocated chunk */
471 unsigned short pa_free; /* how many blocks are free */
472 unsigned short pa_linear; /* consumed in one direction
473 * strictly, for grp prealloc */
474 spinlock_t *pa_obj_lock;
475 struct inode *pa_inode; /* hack, for history only */
476};
477
478
479struct ext4_free_extent {
480 ext4_lblk_t fe_logical;
481 ext4_grpblk_t fe_start;
482 ext4_group_t fe_group;
483 int fe_len;
484};
485
486/*
487 * Locality group:
488 * we try to group all related changes together
489 * so that writeback can flush/allocate them together as well
490 */
491struct ext4_locality_group {
492 /* for allocator */
493 struct mutex lg_mutex; /* to serialize allocates */
494 struct list_head lg_prealloc_list;/* list of preallocations */
495 spinlock_t lg_prealloc_lock;
496};
497
498struct ext4_allocation_context {
499 struct inode *ac_inode;
500 struct super_block *ac_sb;
501
502 /* original request */
503 struct ext4_free_extent ac_o_ex;
504
505 /* goal request (after normalization) */
506 struct ext4_free_extent ac_g_ex;
507
508 /* the best found extent */
509 struct ext4_free_extent ac_b_ex;
510
511 /* copy of the bext found extent taken before preallocation efforts */
512 struct ext4_free_extent ac_f_ex;
513
514 /* number of iterations done. we have to track to limit searching */
515 unsigned long ac_ex_scanned;
516 __u16 ac_groups_scanned;
517 __u16 ac_found;
518 __u16 ac_tail;
519 __u16 ac_buddy;
520 __u16 ac_flags; /* allocation hints */
521 __u8 ac_status;
522 __u8 ac_criteria;
523 __u8 ac_repeats;
524 __u8 ac_2order; /* if request is to allocate 2^N blocks and
525 * N > 0, the field stores N, otherwise 0 */
526 __u8 ac_op; /* operation, for history only */
527 struct page *ac_bitmap_page;
528 struct page *ac_buddy_page;
529 struct ext4_prealloc_space *ac_pa;
530 struct ext4_locality_group *ac_lg;
531};
532
533#define AC_STATUS_CONTINUE 1
534#define AC_STATUS_FOUND 2
535#define AC_STATUS_BREAK 3
536
537struct ext4_mb_history {
538 struct ext4_free_extent orig; /* orig allocation */
539 struct ext4_free_extent goal; /* goal allocation */
540 struct ext4_free_extent result; /* result allocation */
541 unsigned pid;
542 unsigned ino;
543 __u16 found; /* how many extents have been found */
544 __u16 groups; /* how many groups have been scanned */
545 __u16 tail; /* what tail broke some buddy */
546 __u16 buddy; /* buddy the tail ^^^ broke */
547 __u16 flags;
548 __u8 cr:3; /* which phase the result extent was found at */
549 __u8 op:4;
550 __u8 merged:1;
551};
552
553struct ext4_buddy {
554 struct page *bd_buddy_page;
555 void *bd_buddy;
556 struct page *bd_bitmap_page;
557 void *bd_bitmap;
558 struct ext4_group_info *bd_info;
559 struct super_block *bd_sb;
560 __u16 bd_blkbits;
561 ext4_group_t bd_group;
562};
563#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
564#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
565
566#ifndef EXT4_MB_HISTORY
567static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
568{
569 return;
570}
571#else
572static void ext4_mb_store_history(struct ext4_allocation_context *ac);
573#endif
574
575#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
576
577static struct proc_dir_entry *proc_root_ext4;
578struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
579ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
580 ext4_fsblk_t goal, unsigned long *count, int *errp);
581
582static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
583 ext4_group_t group);
584static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
585static void ext4_mb_free_committed_blocks(struct super_block *);
586static void ext4_mb_return_to_preallocation(struct inode *inode,
587 struct ext4_buddy *e4b, sector_t block,
588 int count);
589static void ext4_mb_put_pa(struct ext4_allocation_context *,
590 struct super_block *, struct ext4_prealloc_space *pa);
591static int ext4_mb_init_per_dev_proc(struct super_block *sb);
592static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
593
594
595static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
596{
597 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
598
599 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
600}
601
602static inline void ext4_unlock_group(struct super_block *sb,
603 ext4_group_t group)
604{
605 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
606
607 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
608}
609
610static inline int ext4_is_group_locked(struct super_block *sb,
611 ext4_group_t group)
612{
613 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
614
615 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
616 &(grinfo->bb_state));
617}
618
619static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
620 struct ext4_free_extent *fex)
621{
622 ext4_fsblk_t block;
623
624 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
625 + fex->fe_start
626 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
627 return block;
628}
629
630static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 334static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
631{ 335{
632#if BITS_PER_LONG == 64 336#if BITS_PER_LONG == 64
@@ -736,7 +440,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
736 blocknr += 440 blocknr +=
737 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 441 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
738 442
739 ext4_error(sb, __FUNCTION__, "double-free of inode" 443 ext4_error(sb, __func__, "double-free of inode"
740 " %lu's block %llu(bit %u in group %lu)\n", 444 " %lu's block %llu(bit %u in group %lu)\n",
741 inode ? inode->i_ino : 0, blocknr, 445 inode ? inode->i_ino : 0, blocknr,
742 first + i, e4b->bd_group); 446 first + i, e4b->bd_group);
@@ -898,17 +602,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
898 list_for_each(cur, &grp->bb_prealloc_list) { 602 list_for_each(cur, &grp->bb_prealloc_list) {
899 ext4_group_t groupnr; 603 ext4_group_t groupnr;
900 struct ext4_prealloc_space *pa; 604 struct ext4_prealloc_space *pa;
901 pa = list_entry(cur, struct ext4_prealloc_space, group_list); 605 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
902 ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k); 606 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
903 MB_CHECK_ASSERT(groupnr == e4b->bd_group); 607 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
904 for (i = 0; i < pa->len; i++) 608 for (i = 0; i < pa->pa_len; i++)
905 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); 609 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
906 } 610 }
907 return 0; 611 return 0;
908} 612}
909#undef MB_CHECK_ASSERT 613#undef MB_CHECK_ASSERT
910#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ 614#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
911 __FILE__, __FUNCTION__, __LINE__) 615 __FILE__, __func__, __LINE__)
912#else 616#else
913#define mb_check_buddy(e4b) 617#define mb_check_buddy(e4b)
914#endif 618#endif
@@ -982,7 +686,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
982 grp->bb_fragments = fragments; 686 grp->bb_fragments = fragments;
983 687
984 if (free != grp->bb_free) { 688 if (free != grp->bb_free) {
985 ext4_error(sb, __FUNCTION__, 689 ext4_error(sb, __func__,
986 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", 690 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
987 group, free, grp->bb_free); 691 group, free, grp->bb_free);
988 /* 692 /*
@@ -1168,8 +872,9 @@ out:
1168 return err; 872 return err;
1169} 873}
1170 874
1171static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, 875static noinline_for_stack int
1172 struct ext4_buddy *e4b) 876ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
877 struct ext4_buddy *e4b)
1173{ 878{
1174 struct ext4_sb_info *sbi = EXT4_SB(sb); 879 struct ext4_sb_info *sbi = EXT4_SB(sb);
1175 struct inode *inode = sbi->s_buddy_cache; 880 struct inode *inode = sbi->s_buddy_cache;
@@ -1367,7 +1072,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1367 blocknr += 1072 blocknr +=
1368 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 1073 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1369 1074
1370 ext4_error(sb, __FUNCTION__, "double-free of inode" 1075 ext4_error(sb, __func__, "double-free of inode"
1371 " %lu's block %llu(bit %u in group %lu)\n", 1076 " %lu's block %llu(bit %u in group %lu)\n",
1372 inode ? inode->i_ino : 0, blocknr, block, 1077 inode ? inode->i_ino : 0, blocknr, block,
1373 e4b->bd_group); 1078 e4b->bd_group);
@@ -1848,7 +1553,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1848 * free blocks even though group info says we 1553 * free blocks even though group info says we
1849 * we have free blocks 1554 * we have free blocks
1850 */ 1555 */
1851 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1556 ext4_error(sb, __func__, "%d free blocks as per "
1852 "group info. But bitmap says 0\n", 1557 "group info. But bitmap says 0\n",
1853 free); 1558 free);
1854 break; 1559 break;
@@ -1857,7 +1562,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1857 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1562 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1858 BUG_ON(ex.fe_len <= 0); 1563 BUG_ON(ex.fe_len <= 0);
1859 if (free < ex.fe_len) { 1564 if (free < ex.fe_len) {
1860 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1565 ext4_error(sb, __func__, "%d free blocks as per "
1861 "group info. But got %d blocks\n", 1566 "group info. But got %d blocks\n",
1862 free, ex.fe_len); 1567 free, ex.fe_len);
1863 /* 1568 /*
@@ -1965,7 +1670,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1965 return 0; 1670 return 0;
1966} 1671}
1967 1672
1968static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1673static noinline_for_stack int
1674ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1969{ 1675{
1970 ext4_group_t group; 1676 ext4_group_t group;
1971 ext4_group_t i; 1677 ext4_group_t i;
@@ -2465,7 +2171,8 @@ static void ext4_mb_history_init(struct super_block *sb)
2465 /* if we can't allocate history, then we simple won't use it */ 2171 /* if we can't allocate history, then we simple won't use it */
2466} 2172}
2467 2173
2468static void ext4_mb_store_history(struct ext4_allocation_context *ac) 2174static noinline_for_stack void
2175ext4_mb_store_history(struct ext4_allocation_context *ac)
2469{ 2176{
2470 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2177 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2471 struct ext4_mb_history h; 2178 struct ext4_mb_history h;
@@ -2565,13 +2272,13 @@ static int ext4_mb_init_backend(struct super_block *sb)
2565 meta_group_info[j] = kzalloc(len, GFP_KERNEL); 2272 meta_group_info[j] = kzalloc(len, GFP_KERNEL);
2566 if (meta_group_info[j] == NULL) { 2273 if (meta_group_info[j] == NULL) {
2567 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); 2274 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2568 i--;
2569 goto err_freebuddy; 2275 goto err_freebuddy;
2570 } 2276 }
2571 desc = ext4_get_group_desc(sb, i, NULL); 2277 desc = ext4_get_group_desc(sb, i, NULL);
2572 if (desc == NULL) { 2278 if (desc == NULL) {
2573 printk(KERN_ERR 2279 printk(KERN_ERR
2574 "EXT4-fs: can't read descriptor %lu\n", i); 2280 "EXT4-fs: can't read descriptor %lu\n", i);
2281 i++;
2575 goto err_freebuddy; 2282 goto err_freebuddy;
2576 } 2283 }
2577 memset(meta_group_info[j], 0, len); 2284 memset(meta_group_info[j], 0, len);
@@ -2611,13 +2318,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
2611 return 0; 2318 return 0;
2612 2319
2613err_freebuddy: 2320err_freebuddy:
2614 while (i >= 0) { 2321 while (i-- > 0)
2615 kfree(ext4_get_group_info(sb, i)); 2322 kfree(ext4_get_group_info(sb, i));
2616 i--;
2617 }
2618 i = num_meta_group_infos; 2323 i = num_meta_group_infos;
2619err_freemeta: 2324err_freemeta:
2620 while (--i >= 0) 2325 while (i-- > 0)
2621 kfree(sbi->s_group_info[i]); 2326 kfree(sbi->s_group_info[i]);
2622 iput(sbi->s_buddy_cache); 2327 iput(sbi->s_buddy_cache);
2623err_freesgi: 2328err_freesgi:
@@ -2801,7 +2506,8 @@ int ext4_mb_release(struct super_block *sb)
2801 return 0; 2506 return 0;
2802} 2507}
2803 2508
2804static void ext4_mb_free_committed_blocks(struct super_block *sb) 2509static noinline_for_stack void
2510ext4_mb_free_committed_blocks(struct super_block *sb)
2805{ 2511{
2806 struct ext4_sb_info *sbi = EXT4_SB(sb); 2512 struct ext4_sb_info *sbi = EXT4_SB(sb);
2807 int err; 2513 int err;
@@ -3021,7 +2727,8 @@ void exit_ext4_mballoc(void)
3021 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps 2727 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
3022 * Returns 0 if success or error code 2728 * Returns 0 if success or error code
3023 */ 2729 */
3024static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2730static noinline_for_stack int
2731ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3025 handle_t *handle) 2732 handle_t *handle)
3026{ 2733{
3027 struct buffer_head *bitmap_bh = NULL; 2734 struct buffer_head *bitmap_bh = NULL;
@@ -3070,7 +2777,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3070 in_range(block, ext4_inode_table(sb, gdp), 2777 in_range(block, ext4_inode_table(sb, gdp),
3071 EXT4_SB(sb)->s_itb_per_group)) { 2778 EXT4_SB(sb)->s_itb_per_group)) {
3072 2779
3073 ext4_error(sb, __FUNCTION__, 2780 ext4_error(sb, __func__,
3074 "Allocating block in system zone - block = %llu", 2781 "Allocating block in system zone - block = %llu",
3075 block); 2782 block);
3076 } 2783 }
@@ -3094,9 +2801,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3094 ac->ac_b_ex.fe_group, 2801 ac->ac_b_ex.fe_group,
3095 gdp)); 2802 gdp));
3096 } 2803 }
3097 gdp->bg_free_blocks_count = 2804 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
3098 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
3099 - ac->ac_b_ex.fe_len);
3100 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2805 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
3101 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2806 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
3102 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2807 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3130,7 +2835,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3130 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; 2835 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
3131 else 2836 else
3132 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; 2837 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3133 mb_debug("#%u: goal %lu blocks for locality group\n", 2838 mb_debug("#%u: goal %u blocks for locality group\n",
3134 current->pid, ac->ac_g_ex.fe_len); 2839 current->pid, ac->ac_g_ex.fe_len);
3135} 2840}
3136 2841
@@ -3138,15 +2843,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3138 * Normalization means making request better in terms of 2843 * Normalization means making request better in terms of
3139 * size and alignment 2844 * size and alignment
3140 */ 2845 */
3141static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, 2846static noinline_for_stack void
2847ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3142 struct ext4_allocation_request *ar) 2848 struct ext4_allocation_request *ar)
3143{ 2849{
3144 int bsbits, max; 2850 int bsbits, max;
3145 ext4_lblk_t end; 2851 ext4_lblk_t end;
3146 struct list_head *cur;
3147 loff_t size, orig_size, start_off; 2852 loff_t size, orig_size, start_off;
3148 ext4_lblk_t start, orig_start; 2853 ext4_lblk_t start, orig_start;
3149 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 2854 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2855 struct ext4_prealloc_space *pa;
3150 2856
3151 /* do normalize only data requests, metadata requests 2857 /* do normalize only data requests, metadata requests
3152 do not need preallocation */ 2858 do not need preallocation */
@@ -3232,12 +2938,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3232 2938
3233 /* check we don't cross already preallocated blocks */ 2939 /* check we don't cross already preallocated blocks */
3234 rcu_read_lock(); 2940 rcu_read_lock();
3235 list_for_each_rcu(cur, &ei->i_prealloc_list) { 2941 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3236 struct ext4_prealloc_space *pa;
3237 unsigned long pa_end; 2942 unsigned long pa_end;
3238 2943
3239 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3240
3241 if (pa->pa_deleted) 2944 if (pa->pa_deleted)
3242 continue; 2945 continue;
3243 spin_lock(&pa->pa_lock); 2946 spin_lock(&pa->pa_lock);
@@ -3279,10 +2982,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3279 2982
3280 /* XXX: extra loop to check we really don't overlap preallocations */ 2983 /* XXX: extra loop to check we really don't overlap preallocations */
3281 rcu_read_lock(); 2984 rcu_read_lock();
3282 list_for_each_rcu(cur, &ei->i_prealloc_list) { 2985 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3283 struct ext4_prealloc_space *pa;
3284 unsigned long pa_end; 2986 unsigned long pa_end;
3285 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3286 spin_lock(&pa->pa_lock); 2987 spin_lock(&pa->pa_lock);
3287 if (pa->pa_deleted == 0) { 2988 if (pa->pa_deleted == 0) {
3288 pa_end = pa->pa_lstart + pa->pa_len; 2989 pa_end = pa->pa_lstart + pa->pa_len;
@@ -3374,7 +3075,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3374 BUG_ON(pa->pa_free < len); 3075 BUG_ON(pa->pa_free < len);
3375 pa->pa_free -= len; 3076 pa->pa_free -= len;
3376 3077
3377 mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa); 3078 mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
3378} 3079}
3379 3080
3380/* 3081/*
@@ -3404,12 +3105,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3404/* 3105/*
3405 * search goal blocks in preallocated space 3106 * search goal blocks in preallocated space
3406 */ 3107 */
3407static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3108static noinline_for_stack int
3109ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3408{ 3110{
3409 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3111 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3410 struct ext4_locality_group *lg; 3112 struct ext4_locality_group *lg;
3411 struct ext4_prealloc_space *pa; 3113 struct ext4_prealloc_space *pa;
3412 struct list_head *cur;
3413 3114
3414 /* only data can be preallocated */ 3115 /* only data can be preallocated */
3415 if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) 3116 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3417,8 +3118,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3417 3118
3418 /* first, try per-file preallocation */ 3119 /* first, try per-file preallocation */
3419 rcu_read_lock(); 3120 rcu_read_lock();
3420 list_for_each_rcu(cur, &ei->i_prealloc_list) { 3121 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3421 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3422 3122
3423 /* all fields in this condition don't change, 3123 /* all fields in this condition don't change,
3424 * so we can skip locking for them */ 3124 * so we can skip locking for them */
@@ -3450,8 +3150,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3450 return 0; 3150 return 0;
3451 3151
3452 rcu_read_lock(); 3152 rcu_read_lock();
3453 list_for_each_rcu(cur, &lg->lg_prealloc_list) { 3153 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
3454 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3455 spin_lock(&pa->pa_lock); 3154 spin_lock(&pa->pa_lock);
3456 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { 3155 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
3457 atomic_inc(&pa->pa_count); 3156 atomic_inc(&pa->pa_count);
@@ -3571,7 +3270,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3571/* 3270/*
3572 * creates new preallocated space for given inode 3271 * creates new preallocated space for given inode
3573 */ 3272 */
3574static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) 3273static noinline_for_stack int
3274ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3575{ 3275{
3576 struct super_block *sb = ac->ac_sb; 3276 struct super_block *sb = ac->ac_sb;
3577 struct ext4_prealloc_space *pa; 3277 struct ext4_prealloc_space *pa;
@@ -3658,7 +3358,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3658/* 3358/*
3659 * creates new preallocated space for locality group inodes belongs to 3359 * creates new preallocated space for locality group inodes belongs to
3660 */ 3360 */
3661static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac) 3361static noinline_for_stack int
3362ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3662{ 3363{
3663 struct super_block *sb = ac->ac_sb; 3364 struct super_block *sb = ac->ac_sb;
3664 struct ext4_locality_group *lg; 3365 struct ext4_locality_group *lg;
@@ -3731,11 +3432,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3731 * the caller MUST hold group/inode locks. 3432 * the caller MUST hold group/inode locks.
3732 * TODO: optimize the case when there are no in-core structures yet 3433 * TODO: optimize the case when there are no in-core structures yet
3733 */ 3434 */
3734static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, 3435static noinline_for_stack int
3735 struct buffer_head *bitmap_bh, 3436ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3736 struct ext4_prealloc_space *pa) 3437 struct ext4_prealloc_space *pa,
3438 struct ext4_allocation_context *ac)
3737{ 3439{
3738 struct ext4_allocation_context *ac;
3739 struct super_block *sb = e4b->bd_sb; 3440 struct super_block *sb = e4b->bd_sb;
3740 struct ext4_sb_info *sbi = EXT4_SB(sb); 3441 struct ext4_sb_info *sbi = EXT4_SB(sb);
3741 unsigned long end; 3442 unsigned long end;
@@ -3751,8 +3452,6 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3751 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3452 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3752 end = bit + pa->pa_len; 3453 end = bit + pa->pa_len;
3753 3454
3754 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3755
3756 if (ac) { 3455 if (ac) {
3757 ac->ac_sb = sb; 3456 ac->ac_sb = sb;
3758 ac->ac_inode = pa->pa_inode; 3457 ac->ac_inode = pa->pa_inode;
@@ -3789,7 +3488,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3789 pa, (unsigned long) pa->pa_lstart, 3488 pa, (unsigned long) pa->pa_lstart,
3790 (unsigned long) pa->pa_pstart, 3489 (unsigned long) pa->pa_pstart,
3791 (unsigned long) pa->pa_len); 3490 (unsigned long) pa->pa_len);
3792 ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", 3491 ext4_error(sb, __func__, "free %u, pa_free %u\n",
3793 free, pa->pa_free); 3492 free, pa->pa_free);
3794 /* 3493 /*
3795 * pa is already deleted so we use the value obtained 3494 * pa is already deleted so we use the value obtained
@@ -3797,22 +3496,19 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3797 */ 3496 */
3798 } 3497 }
3799 atomic_add(free, &sbi->s_mb_discarded); 3498 atomic_add(free, &sbi->s_mb_discarded);
3800 if (ac)
3801 kmem_cache_free(ext4_ac_cachep, ac);
3802 3499
3803 return err; 3500 return err;
3804} 3501}
3805 3502
3806static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, 3503static noinline_for_stack int
3807 struct ext4_prealloc_space *pa) 3504ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3505 struct ext4_prealloc_space *pa,
3506 struct ext4_allocation_context *ac)
3808{ 3507{
3809 struct ext4_allocation_context *ac;
3810 struct super_block *sb = e4b->bd_sb; 3508 struct super_block *sb = e4b->bd_sb;
3811 ext4_group_t group; 3509 ext4_group_t group;
3812 ext4_grpblk_t bit; 3510 ext4_grpblk_t bit;
3813 3511
3814 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3815
3816 if (ac) 3512 if (ac)
3817 ac->ac_op = EXT4_MB_HISTORY_DISCARD; 3513 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3818 3514
@@ -3830,7 +3526,6 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3830 ac->ac_b_ex.fe_len = pa->pa_len; 3526 ac->ac_b_ex.fe_len = pa->pa_len;
3831 ac->ac_b_ex.fe_logical = 0; 3527 ac->ac_b_ex.fe_logical = 0;
3832 ext4_mb_store_history(ac); 3528 ext4_mb_store_history(ac);
3833 kmem_cache_free(ext4_ac_cachep, ac);
3834 } 3529 }
3835 3530
3836 return 0; 3531 return 0;
@@ -3845,12 +3540,14 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3845 * - how many do we discard 3540 * - how many do we discard
3846 * 1) how many requested 3541 * 1) how many requested
3847 */ 3542 */
3848static int ext4_mb_discard_group_preallocations(struct super_block *sb, 3543static noinline_for_stack int
3544ext4_mb_discard_group_preallocations(struct super_block *sb,
3849 ext4_group_t group, int needed) 3545 ext4_group_t group, int needed)
3850{ 3546{
3851 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 3547 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3852 struct buffer_head *bitmap_bh = NULL; 3548 struct buffer_head *bitmap_bh = NULL;
3853 struct ext4_prealloc_space *pa, *tmp; 3549 struct ext4_prealloc_space *pa, *tmp;
3550 struct ext4_allocation_context *ac;
3854 struct list_head list; 3551 struct list_head list;
3855 struct ext4_buddy e4b; 3552 struct ext4_buddy e4b;
3856 int err; 3553 int err;
@@ -3878,6 +3575,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
3878 grp = ext4_get_group_info(sb, group); 3575 grp = ext4_get_group_info(sb, group);
3879 INIT_LIST_HEAD(&list); 3576 INIT_LIST_HEAD(&list);
3880 3577
3578 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3881repeat: 3579repeat:
3882 ext4_lock_group(sb, group); 3580 ext4_lock_group(sb, group);
3883 list_for_each_entry_safe(pa, tmp, 3581 list_for_each_entry_safe(pa, tmp,
@@ -3932,9 +3630,9 @@ repeat:
3932 spin_unlock(pa->pa_obj_lock); 3630 spin_unlock(pa->pa_obj_lock);
3933 3631
3934 if (pa->pa_linear) 3632 if (pa->pa_linear)
3935 ext4_mb_release_group_pa(&e4b, pa); 3633 ext4_mb_release_group_pa(&e4b, pa, ac);
3936 else 3634 else
3937 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); 3635 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
3938 3636
3939 list_del(&pa->u.pa_tmp_list); 3637 list_del(&pa->u.pa_tmp_list);
3940 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3638 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3942,6 +3640,8 @@ repeat:
3942 3640
3943out: 3641out:
3944 ext4_unlock_group(sb, group); 3642 ext4_unlock_group(sb, group);
3643 if (ac)
3644 kmem_cache_free(ext4_ac_cachep, ac);
3945 ext4_mb_release_desc(&e4b); 3645 ext4_mb_release_desc(&e4b);
3946 put_bh(bitmap_bh); 3646 put_bh(bitmap_bh);
3947 return free; 3647 return free;
@@ -3962,6 +3662,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3962 struct super_block *sb = inode->i_sb; 3662 struct super_block *sb = inode->i_sb;
3963 struct buffer_head *bitmap_bh = NULL; 3663 struct buffer_head *bitmap_bh = NULL;
3964 struct ext4_prealloc_space *pa, *tmp; 3664 struct ext4_prealloc_space *pa, *tmp;
3665 struct ext4_allocation_context *ac;
3965 ext4_group_t group = 0; 3666 ext4_group_t group = 0;
3966 struct list_head list; 3667 struct list_head list;
3967 struct ext4_buddy e4b; 3668 struct ext4_buddy e4b;
@@ -3976,6 +3677,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3976 3677
3977 INIT_LIST_HEAD(&list); 3678 INIT_LIST_HEAD(&list);
3978 3679
3680 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3979repeat: 3681repeat:
3980 /* first, collect all pa's in the inode */ 3682 /* first, collect all pa's in the inode */
3981 spin_lock(&ei->i_prealloc_lock); 3683 spin_lock(&ei->i_prealloc_lock);
@@ -4040,7 +3742,7 @@ repeat:
4040 3742
4041 ext4_lock_group(sb, group); 3743 ext4_lock_group(sb, group);
4042 list_del(&pa->pa_group_list); 3744 list_del(&pa->pa_group_list);
4043 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); 3745 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
4044 ext4_unlock_group(sb, group); 3746 ext4_unlock_group(sb, group);
4045 3747
4046 ext4_mb_release_desc(&e4b); 3748 ext4_mb_release_desc(&e4b);
@@ -4049,6 +3751,8 @@ repeat:
4049 list_del(&pa->u.pa_tmp_list); 3751 list_del(&pa->u.pa_tmp_list);
4050 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3752 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4051 } 3753 }
3754 if (ac)
3755 kmem_cache_free(ext4_ac_cachep, ac);
4052} 3756}
4053 3757
4054/* 3758/*
@@ -4108,7 +3812,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4108 printk(KERN_ERR "PA:%lu:%d:%u \n", i, 3812 printk(KERN_ERR "PA:%lu:%d:%u \n", i,
4109 start, pa->pa_len); 3813 start, pa->pa_len);
4110 } 3814 }
4111 ext4_lock_group(sb, i); 3815 ext4_unlock_group(sb, i);
4112 3816
4113 if (grp->bb_free == 0) 3817 if (grp->bb_free == 0)
4114 continue; 3818 continue;
@@ -4167,7 +3871,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4167 mutex_lock(&ac->ac_lg->lg_mutex); 3871 mutex_lock(&ac->ac_lg->lg_mutex);
4168} 3872}
4169 3873
4170static int ext4_mb_initialize_context(struct ext4_allocation_context *ac, 3874static noinline_for_stack int
3875ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4171 struct ext4_allocation_request *ar) 3876 struct ext4_allocation_request *ar)
4172{ 3877{
4173 struct super_block *sb = ar->inode->i_sb; 3878 struct super_block *sb = ar->inode->i_sb;
@@ -4398,7 +4103,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
4398 ext4_mb_free_committed_blocks(sb); 4103 ext4_mb_free_committed_blocks(sb);
4399} 4104}
4400 4105
4401static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, 4106static noinline_for_stack int
4107ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4402 ext4_group_t group, ext4_grpblk_t block, int count) 4108 ext4_group_t group, ext4_grpblk_t block, int count)
4403{ 4109{
4404 struct ext4_group_info *db = e4b->bd_info; 4110 struct ext4_group_info *db = e4b->bd_info;
@@ -4489,7 +4195,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4489 if (block < le32_to_cpu(es->s_first_data_block) || 4195 if (block < le32_to_cpu(es->s_first_data_block) ||
4490 block + count < block || 4196 block + count < block ||
4491 block + count > ext4_blocks_count(es)) { 4197 block + count > ext4_blocks_count(es)) {
4492 ext4_error(sb, __FUNCTION__, 4198 ext4_error(sb, __func__,
4493 "Freeing blocks not in datazone - " 4199 "Freeing blocks not in datazone - "
4494 "block = %lu, count = %lu", block, count); 4200 "block = %lu, count = %lu", block, count);
4495 goto error_return; 4201 goto error_return;
@@ -4530,7 +4236,7 @@ do_more:
4530 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4236 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4531 EXT4_SB(sb)->s_itb_per_group)) { 4237 EXT4_SB(sb)->s_itb_per_group)) {
4532 4238
4533 ext4_error(sb, __FUNCTION__, 4239 ext4_error(sb, __func__,
4534 "Freeing blocks in system zone - " 4240 "Freeing blocks in system zone - "
4535 "Block = %lu, count = %lu", block, count); 4241 "Block = %lu, count = %lu", block, count);
4536 } 4242 }
@@ -4588,8 +4294,7 @@ do_more:
4588 } 4294 }
4589 4295
4590 spin_lock(sb_bgl_lock(sbi, block_group)); 4296 spin_lock(sb_bgl_lock(sbi, block_group));
4591 gdp->bg_free_blocks_count = 4297 le16_add_cpu(&gdp->bg_free_blocks_count, count);
4592 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
4593 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4298 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4594 spin_unlock(sb_bgl_lock(sbi, block_group)); 4299 spin_unlock(sb_bgl_lock(sbi, block_group));
4595 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4300 percpu_counter_add(&sbi->s_freeblocks_counter, count);