aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2010-10-27 23:44:47 -0400
committerTheodore Ts'o <tytso@mit.edu>2010-10-27 23:44:47 -0400
commita107e5a3a473a2ea62bd5af24e11b84adf1486ff (patch)
treed36c2cb38d8be88d4d75cdebc354aa140aa0e470 /fs/ext4/mballoc.c
parente3e1288e86a07cdeb0aee5860a2dff111c6eff79 (diff)
parenta269029d0e2192046be4c07ed78a45022469ee4c (diff)
Merge branch 'next' into upstream-merge
Conflicts: fs/ext4/inode.c fs/ext4/mballoc.c include/trace/events/ext4.h
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c554
1 files changed, 368 insertions, 186 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 42f77b1dc72d..c58eba34724a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -338,6 +338,14 @@
338static struct kmem_cache *ext4_pspace_cachep; 338static struct kmem_cache *ext4_pspace_cachep;
339static struct kmem_cache *ext4_ac_cachep; 339static struct kmem_cache *ext4_ac_cachep;
340static struct kmem_cache *ext4_free_ext_cachep; 340static struct kmem_cache *ext4_free_ext_cachep;
341
342/* We create slab caches for groupinfo data structures based on the
343 * superblock block size. There will be one per mounted filesystem for
344 * each unique s_blocksize_bits */
345#define NR_GRPINFO_CACHES \
346 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
347static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348
341static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 349static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
342 ext4_group_t group); 350 ext4_group_t group);
343static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 351static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -939,6 +947,85 @@ out:
939} 947}
940 948
941/* 949/*
950 * lock the group_info alloc_sem of all the groups
951 * belonging to the same buddy cache page. This
952 * make sure other parallel operation on the buddy
953 * cache doesn't happen whild holding the buddy cache
954 * lock
955 */
956static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
957 ext4_group_t group)
958{
959 int i;
960 int block, pnum;
961 int blocks_per_page;
962 int groups_per_page;
963 ext4_group_t ngroups = ext4_get_groups_count(sb);
964 ext4_group_t first_group;
965 struct ext4_group_info *grp;
966
967 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
968 /*
969 * the buddy cache inode stores the block bitmap
970 * and buddy information in consecutive blocks.
971 * So for each group we need two blocks.
972 */
973 block = group * 2;
974 pnum = block / blocks_per_page;
975 first_group = pnum * blocks_per_page / 2;
976
977 groups_per_page = blocks_per_page >> 1;
978 if (groups_per_page == 0)
979 groups_per_page = 1;
980 /* read all groups the page covers into the cache */
981 for (i = 0; i < groups_per_page; i++) {
982
983 if ((first_group + i) >= ngroups)
984 break;
985 grp = ext4_get_group_info(sb, first_group + i);
986 /* take all groups write allocation
987 * semaphore. This make sure there is
988 * no block allocation going on in any
989 * of that groups
990 */
991 down_write_nested(&grp->alloc_sem, i);
992 }
993 return i;
994}
995
996static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
997 ext4_group_t group, int locked_group)
998{
999 int i;
1000 int block, pnum;
1001 int blocks_per_page;
1002 ext4_group_t first_group;
1003 struct ext4_group_info *grp;
1004
1005 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1006 /*
1007 * the buddy cache inode stores the block bitmap
1008 * and buddy information in consecutive blocks.
1009 * So for each group we need two blocks.
1010 */
1011 block = group * 2;
1012 pnum = block / blocks_per_page;
1013 first_group = pnum * blocks_per_page / 2;
1014 /* release locks on all the groups */
1015 for (i = 0; i < locked_group; i++) {
1016
1017 grp = ext4_get_group_info(sb, first_group + i);
1018 /* take all groups write allocation
1019 * semaphore. This make sure there is
1020 * no block allocation going on in any
1021 * of that groups
1022 */
1023 up_write(&grp->alloc_sem);
1024 }
1025
1026}
1027
1028/*
942 * Locking note: This routine calls ext4_mb_init_cache(), which takes the 1029 * Locking note: This routine calls ext4_mb_init_cache(), which takes the
943 * block group lock of all groups for this page; do not hold the BG lock when 1030 * block group lock of all groups for this page; do not hold the BG lock when
944 * calling this routine! 1031 * calling this routine!
@@ -1915,84 +2002,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1915 return 0; 2002 return 0;
1916} 2003}
1917 2004
1918/*
1919 * lock the group_info alloc_sem of all the groups
1920 * belonging to the same buddy cache page. This
1921 * make sure other parallel operation on the buddy
1922 * cache doesn't happen whild holding the buddy cache
1923 * lock
1924 */
1925int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1926{
1927 int i;
1928 int block, pnum;
1929 int blocks_per_page;
1930 int groups_per_page;
1931 ext4_group_t ngroups = ext4_get_groups_count(sb);
1932 ext4_group_t first_group;
1933 struct ext4_group_info *grp;
1934
1935 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1936 /*
1937 * the buddy cache inode stores the block bitmap
1938 * and buddy information in consecutive blocks.
1939 * So for each group we need two blocks.
1940 */
1941 block = group * 2;
1942 pnum = block / blocks_per_page;
1943 first_group = pnum * blocks_per_page / 2;
1944
1945 groups_per_page = blocks_per_page >> 1;
1946 if (groups_per_page == 0)
1947 groups_per_page = 1;
1948 /* read all groups the page covers into the cache */
1949 for (i = 0; i < groups_per_page; i++) {
1950
1951 if ((first_group + i) >= ngroups)
1952 break;
1953 grp = ext4_get_group_info(sb, first_group + i);
1954 /* take all groups write allocation
1955 * semaphore. This make sure there is
1956 * no block allocation going on in any
1957 * of that groups
1958 */
1959 down_write_nested(&grp->alloc_sem, i);
1960 }
1961 return i;
1962}
1963
1964void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
1965 ext4_group_t group, int locked_group)
1966{
1967 int i;
1968 int block, pnum;
1969 int blocks_per_page;
1970 ext4_group_t first_group;
1971 struct ext4_group_info *grp;
1972
1973 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1974 /*
1975 * the buddy cache inode stores the block bitmap
1976 * and buddy information in consecutive blocks.
1977 * So for each group we need two blocks.
1978 */
1979 block = group * 2;
1980 pnum = block / blocks_per_page;
1981 first_group = pnum * blocks_per_page / 2;
1982 /* release locks on all the groups */
1983 for (i = 0; i < locked_group; i++) {
1984
1985 grp = ext4_get_group_info(sb, first_group + i);
1986 /* take all groups write allocation
1987 * semaphore. This make sure there is
1988 * no block allocation going on in any
1989 * of that groups
1990 */
1991 up_write(&grp->alloc_sem);
1992 }
1993
1994}
1995
1996static noinline_for_stack int 2005static noinline_for_stack int
1997ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 2006ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1998{ 2007{
@@ -2233,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
2233 .release = seq_release, 2242 .release = seq_release,
2234}; 2243};
2235 2244
2245static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2246{
2247 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2248 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2249
2250 BUG_ON(!cachep);
2251 return cachep;
2252}
2236 2253
2237/* Create and initialize ext4_group_info data for the given group. */ 2254/* Create and initialize ext4_group_info data for the given group. */
2238int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, 2255int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2239 struct ext4_group_desc *desc) 2256 struct ext4_group_desc *desc)
2240{ 2257{
2241 int i, len; 2258 int i;
2242 int metalen = 0; 2259 int metalen = 0;
2243 struct ext4_sb_info *sbi = EXT4_SB(sb); 2260 struct ext4_sb_info *sbi = EXT4_SB(sb);
2244 struct ext4_group_info **meta_group_info; 2261 struct ext4_group_info **meta_group_info;
2262 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2245 2263
2246 /* 2264 /*
2247 * First check if this group is the first of a reserved block. 2265 * First check if this group is the first of a reserved block.
@@ -2261,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2261 meta_group_info; 2279 meta_group_info;
2262 } 2280 }
2263 2281
2264 /*
2265 * calculate needed size. if change bb_counters size,
2266 * don't forget about ext4_mb_generate_buddy()
2267 */
2268 len = offsetof(typeof(**meta_group_info),
2269 bb_counters[sb->s_blocksize_bits + 2]);
2270
2271 meta_group_info = 2282 meta_group_info =
2272 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; 2283 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2273 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); 2284 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2274 2285
2275 meta_group_info[i] = kzalloc(len, GFP_KERNEL); 2286 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
2276 if (meta_group_info[i] == NULL) { 2287 if (meta_group_info[i] == NULL) {
2277 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); 2288 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2278 goto exit_group_info; 2289 goto exit_group_info;
2279 } 2290 }
2291 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
2280 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, 2292 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2281 &(meta_group_info[i]->bb_state)); 2293 &(meta_group_info[i]->bb_state));
2282 2294
@@ -2331,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2331 int num_meta_group_infos_max; 2343 int num_meta_group_infos_max;
2332 int array_size; 2344 int array_size;
2333 struct ext4_group_desc *desc; 2345 struct ext4_group_desc *desc;
2346 struct kmem_cache *cachep;
2334 2347
2335 /* This is the number of blocks used by GDT */ 2348 /* This is the number of blocks used by GDT */
2336 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 2349 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
@@ -2389,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb)
2389 return 0; 2402 return 0;
2390 2403
2391err_freebuddy: 2404err_freebuddy:
2405 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2392 while (i-- > 0) 2406 while (i-- > 0)
2393 kfree(ext4_get_group_info(sb, i)); 2407 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2394 i = num_meta_group_infos; 2408 i = num_meta_group_infos;
2395 while (i-- > 0) 2409 while (i-- > 0)
2396 kfree(sbi->s_group_info[i]); 2410 kfree(sbi->s_group_info[i]);
@@ -2407,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2407 unsigned offset; 2421 unsigned offset;
2408 unsigned max; 2422 unsigned max;
2409 int ret; 2423 int ret;
2424 int cache_index;
2425 struct kmem_cache *cachep;
2426 char *namep = NULL;
2410 2427
2411 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); 2428 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2412 2429
2413 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); 2430 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2414 if (sbi->s_mb_offsets == NULL) { 2431 if (sbi->s_mb_offsets == NULL) {
2415 return -ENOMEM; 2432 ret = -ENOMEM;
2433 goto out;
2416 } 2434 }
2417 2435
2418 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); 2436 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2419 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); 2437 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2420 if (sbi->s_mb_maxs == NULL) { 2438 if (sbi->s_mb_maxs == NULL) {
2421 kfree(sbi->s_mb_offsets); 2439 ret = -ENOMEM;
2422 return -ENOMEM; 2440 goto out;
2441 }
2442
2443 cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2444 cachep = ext4_groupinfo_caches[cache_index];
2445 if (!cachep) {
2446 char name[32];
2447 int len = offsetof(struct ext4_group_info,
2448 bb_counters[sb->s_blocksize_bits + 2]);
2449
2450 sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
2451 namep = kstrdup(name, GFP_KERNEL);
2452 if (!namep) {
2453 ret = -ENOMEM;
2454 goto out;
2455 }
2456
2457 /* Need to free the kmem_cache_name() when we
2458 * destroy the slab */
2459 cachep = kmem_cache_create(namep, len, 0,
2460 SLAB_RECLAIM_ACCOUNT, NULL);
2461 if (!cachep) {
2462 ret = -ENOMEM;
2463 goto out;
2464 }
2465 ext4_groupinfo_caches[cache_index] = cachep;
2423 } 2466 }
2424 2467
2425 /* order 0 is regular bitmap */ 2468 /* order 0 is regular bitmap */
@@ -2440,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2440 /* init file for buddy data */ 2483 /* init file for buddy data */
2441 ret = ext4_mb_init_backend(sb); 2484 ret = ext4_mb_init_backend(sb);
2442 if (ret != 0) { 2485 if (ret != 0) {
2443 kfree(sbi->s_mb_offsets); 2486 goto out;
2444 kfree(sbi->s_mb_maxs);
2445 return ret;
2446 } 2487 }
2447 2488
2448 spin_lock_init(&sbi->s_md_lock); 2489 spin_lock_init(&sbi->s_md_lock);
@@ -2457,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2457 2498
2458 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2499 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2459 if (sbi->s_locality_groups == NULL) { 2500 if (sbi->s_locality_groups == NULL) {
2460 kfree(sbi->s_mb_offsets); 2501 ret = -ENOMEM;
2461 kfree(sbi->s_mb_maxs); 2502 goto out;
2462 return -ENOMEM;
2463 } 2503 }
2464 for_each_possible_cpu(i) { 2504 for_each_possible_cpu(i) {
2465 struct ext4_locality_group *lg; 2505 struct ext4_locality_group *lg;
@@ -2476,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2476 2516
2477 if (sbi->s_journal) 2517 if (sbi->s_journal)
2478 sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2518 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2479 return 0; 2519out:
2520 if (ret) {
2521 kfree(sbi->s_mb_offsets);
2522 kfree(sbi->s_mb_maxs);
2523 kfree(namep);
2524 }
2525 return ret;
2480} 2526}
2481 2527
2482/* need to called with the ext4 group lock held */ 2528/* need to called with the ext4 group lock held */
@@ -2504,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb)
2504 int num_meta_group_infos; 2550 int num_meta_group_infos;
2505 struct ext4_group_info *grinfo; 2551 struct ext4_group_info *grinfo;
2506 struct ext4_sb_info *sbi = EXT4_SB(sb); 2552 struct ext4_sb_info *sbi = EXT4_SB(sb);
2553 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2507 2554
2508 if (sbi->s_group_info) { 2555 if (sbi->s_group_info) {
2509 for (i = 0; i < ngroups; i++) { 2556 for (i = 0; i < ngroups; i++) {
@@ -2514,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb)
2514 ext4_lock_group(sb, i); 2561 ext4_lock_group(sb, i);
2515 ext4_mb_cleanup_pa(grinfo); 2562 ext4_mb_cleanup_pa(grinfo);
2516 ext4_unlock_group(sb, i); 2563 ext4_unlock_group(sb, i);
2517 kfree(grinfo); 2564 kmem_cache_free(cachep, grinfo);
2518 } 2565 }
2519 num_meta_group_infos = (ngroups + 2566 num_meta_group_infos = (ngroups +
2520 EXT4_DESC_PER_BLOCK(sb) - 1) >> 2567 EXT4_DESC_PER_BLOCK(sb) - 1) >>
@@ -2558,7 +2605,7 @@ int ext4_mb_release(struct super_block *sb)
2558 return 0; 2605 return 0;
2559} 2606}
2560 2607
2561static inline void ext4_issue_discard(struct super_block *sb, 2608static inline int ext4_issue_discard(struct super_block *sb,
2562 ext4_group_t block_group, ext4_grpblk_t block, int count) 2609 ext4_group_t block_group, ext4_grpblk_t block, int count)
2563{ 2610{
2564 int ret; 2611 int ret;
@@ -2568,10 +2615,11 @@ static inline void ext4_issue_discard(struct super_block *sb,
2568 trace_ext4_discard_blocks(sb, 2615 trace_ext4_discard_blocks(sb,
2569 (unsigned long long) discard_block, count); 2616 (unsigned long long) discard_block, count);
2570 ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2617 ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2571 if (ret == EOPNOTSUPP) { 2618 if (ret == -EOPNOTSUPP) {
2572 ext4_warning(sb, "discard not supported, disabling"); 2619 ext4_warning(sb, "discard not supported, disabling");
2573 clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); 2620 clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
2574 } 2621 }
2622 return ret;
2575} 2623}
2576 2624
2577/* 2625/*
@@ -2659,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void)
2659 2707
2660#endif 2708#endif
2661 2709
2662int __init init_ext4_mballoc(void) 2710int __init ext4_init_mballoc(void)
2663{ 2711{
2664 ext4_pspace_cachep = 2712 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2665 kmem_cache_create("ext4_prealloc_space", 2713 SLAB_RECLAIM_ACCOUNT);
2666 sizeof(struct ext4_prealloc_space),
2667 0, SLAB_RECLAIM_ACCOUNT, NULL);
2668 if (ext4_pspace_cachep == NULL) 2714 if (ext4_pspace_cachep == NULL)
2669 return -ENOMEM; 2715 return -ENOMEM;
2670 2716
2671 ext4_ac_cachep = 2717 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2672 kmem_cache_create("ext4_alloc_context", 2718 SLAB_RECLAIM_ACCOUNT);
2673 sizeof(struct ext4_allocation_context),
2674 0, SLAB_RECLAIM_ACCOUNT, NULL);
2675 if (ext4_ac_cachep == NULL) { 2719 if (ext4_ac_cachep == NULL) {
2676 kmem_cache_destroy(ext4_pspace_cachep); 2720 kmem_cache_destroy(ext4_pspace_cachep);
2677 return -ENOMEM; 2721 return -ENOMEM;
2678 } 2722 }
2679 2723
2680 ext4_free_ext_cachep = 2724 ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
2681 kmem_cache_create("ext4_free_block_extents", 2725 SLAB_RECLAIM_ACCOUNT);
2682 sizeof(struct ext4_free_data),
2683 0, SLAB_RECLAIM_ACCOUNT, NULL);
2684 if (ext4_free_ext_cachep == NULL) { 2726 if (ext4_free_ext_cachep == NULL) {
2685 kmem_cache_destroy(ext4_pspace_cachep); 2727 kmem_cache_destroy(ext4_pspace_cachep);
2686 kmem_cache_destroy(ext4_ac_cachep); 2728 kmem_cache_destroy(ext4_ac_cachep);
@@ -2690,8 +2732,9 @@ int __init init_ext4_mballoc(void)
2690 return 0; 2732 return 0;
2691} 2733}
2692 2734
2693void exit_ext4_mballoc(void) 2735void ext4_exit_mballoc(void)
2694{ 2736{
2737 int i;
2695 /* 2738 /*
2696 * Wait for completion of call_rcu()'s on ext4_pspace_cachep 2739 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
2697 * before destroying the slab cache. 2740 * before destroying the slab cache.
@@ -2700,6 +2743,15 @@ void exit_ext4_mballoc(void)
2700 kmem_cache_destroy(ext4_pspace_cachep); 2743 kmem_cache_destroy(ext4_pspace_cachep);
2701 kmem_cache_destroy(ext4_ac_cachep); 2744 kmem_cache_destroy(ext4_ac_cachep);
2702 kmem_cache_destroy(ext4_free_ext_cachep); 2745 kmem_cache_destroy(ext4_free_ext_cachep);
2746
2747 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2748 struct kmem_cache *cachep = ext4_groupinfo_caches[i];
2749 if (cachep) {
2750 char *name = (char *)kmem_cache_name(cachep);
2751 kmem_cache_destroy(cachep);
2752 kfree(name);
2753 }
2754 }
2703 ext4_remove_debugfs_entry(); 2755 ext4_remove_debugfs_entry();
2704} 2756}
2705 2757
@@ -3536,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3536 */ 3588 */
3537static noinline_for_stack int 3589static noinline_for_stack int
3538ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, 3590ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3539 struct ext4_prealloc_space *pa, 3591 struct ext4_prealloc_space *pa)
3540 struct ext4_allocation_context *ac)
3541{ 3592{
3542 struct super_block *sb = e4b->bd_sb; 3593 struct super_block *sb = e4b->bd_sb;
3543 struct ext4_sb_info *sbi = EXT4_SB(sb); 3594 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3555,11 +3606,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3555 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3606 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3556 end = bit + pa->pa_len; 3607 end = bit + pa->pa_len;
3557 3608
3558 if (ac) {
3559 ac->ac_sb = sb;
3560 ac->ac_inode = pa->pa_inode;
3561 }
3562
3563 while (bit < end) { 3609 while (bit < end) {
3564 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); 3610 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3565 if (bit >= end) 3611 if (bit >= end)
@@ -3570,16 +3616,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3570 (unsigned) next - bit, (unsigned) group); 3616 (unsigned) next - bit, (unsigned) group);
3571 free += next - bit; 3617 free += next - bit;
3572 3618
3573 if (ac) { 3619 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3574 ac->ac_b_ex.fe_group = group; 3620 trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
3575 ac->ac_b_ex.fe_start = bit; 3621 grp_blk_start + bit, next - bit);
3576 ac->ac_b_ex.fe_len = next - bit;
3577 ac->ac_b_ex.fe_logical = 0;
3578 trace_ext4_mballoc_discard(ac);
3579 }
3580
3581 trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
3582 next - bit);
3583 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3622 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3584 bit = next + 1; 3623 bit = next + 1;
3585 } 3624 }
@@ -3602,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3602 3641
3603static noinline_for_stack int 3642static noinline_for_stack int
3604ext4_mb_release_group_pa(struct ext4_buddy *e4b, 3643ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3605 struct ext4_prealloc_space *pa, 3644 struct ext4_prealloc_space *pa)
3606 struct ext4_allocation_context *ac)
3607{ 3645{
3608 struct super_block *sb = e4b->bd_sb; 3646 struct super_block *sb = e4b->bd_sb;
3609 ext4_group_t group; 3647 ext4_group_t group;
3610 ext4_grpblk_t bit; 3648 ext4_grpblk_t bit;
3611 3649
3612 trace_ext4_mb_release_group_pa(sb, ac, pa); 3650 trace_ext4_mb_release_group_pa(sb, pa);
3613 BUG_ON(pa->pa_deleted == 0); 3651 BUG_ON(pa->pa_deleted == 0);
3614 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3652 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3615 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3653 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3616 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); 3654 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3617 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); 3655 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3618 3656 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3619 if (ac) {
3620 ac->ac_sb = sb;
3621 ac->ac_inode = NULL;
3622 ac->ac_b_ex.fe_group = group;
3623 ac->ac_b_ex.fe_start = bit;
3624 ac->ac_b_ex.fe_len = pa->pa_len;
3625 ac->ac_b_ex.fe_logical = 0;
3626 trace_ext4_mballoc_discard(ac);
3627 }
3628 3657
3629 return 0; 3658 return 0;
3630} 3659}
@@ -3645,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3645 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 3674 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3646 struct buffer_head *bitmap_bh = NULL; 3675 struct buffer_head *bitmap_bh = NULL;
3647 struct ext4_prealloc_space *pa, *tmp; 3676 struct ext4_prealloc_space *pa, *tmp;
3648 struct ext4_allocation_context *ac;
3649 struct list_head list; 3677 struct list_head list;
3650 struct ext4_buddy e4b; 3678 struct ext4_buddy e4b;
3651 int err; 3679 int err;
@@ -3674,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3674 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; 3702 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3675 3703
3676 INIT_LIST_HEAD(&list); 3704 INIT_LIST_HEAD(&list);
3677 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3678 if (ac)
3679 ac->ac_sb = sb;
3680repeat: 3705repeat:
3681 ext4_lock_group(sb, group); 3706 ext4_lock_group(sb, group);
3682 list_for_each_entry_safe(pa, tmp, 3707 list_for_each_entry_safe(pa, tmp,
@@ -3731,9 +3756,9 @@ repeat:
3731 spin_unlock(pa->pa_obj_lock); 3756 spin_unlock(pa->pa_obj_lock);
3732 3757
3733 if (pa->pa_type == MB_GROUP_PA) 3758 if (pa->pa_type == MB_GROUP_PA)
3734 ext4_mb_release_group_pa(&e4b, pa, ac); 3759 ext4_mb_release_group_pa(&e4b, pa);
3735 else 3760 else
3736 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); 3761 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3737 3762
3738 list_del(&pa->u.pa_tmp_list); 3763 list_del(&pa->u.pa_tmp_list);
3739 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3764 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3741,8 +3766,6 @@ repeat:
3741 3766
3742out: 3767out:
3743 ext4_unlock_group(sb, group); 3768 ext4_unlock_group(sb, group);
3744 if (ac)
3745 kmem_cache_free(ext4_ac_cachep, ac);
3746 ext4_mb_unload_buddy(&e4b); 3769 ext4_mb_unload_buddy(&e4b);
3747 put_bh(bitmap_bh); 3770 put_bh(bitmap_bh);
3748 return free; 3771 return free;
@@ -3763,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode)
3763 struct super_block *sb = inode->i_sb; 3786 struct super_block *sb = inode->i_sb;
3764 struct buffer_head *bitmap_bh = NULL; 3787 struct buffer_head *bitmap_bh = NULL;
3765 struct ext4_prealloc_space *pa, *tmp; 3788 struct ext4_prealloc_space *pa, *tmp;
3766 struct ext4_allocation_context *ac;
3767 ext4_group_t group = 0; 3789 ext4_group_t group = 0;
3768 struct list_head list; 3790 struct list_head list;
3769 struct ext4_buddy e4b; 3791 struct ext4_buddy e4b;
@@ -3779,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode)
3779 3801
3780 INIT_LIST_HEAD(&list); 3802 INIT_LIST_HEAD(&list);
3781 3803
3782 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3783 if (ac) {
3784 ac->ac_sb = sb;
3785 ac->ac_inode = inode;
3786 }
3787repeat: 3804repeat:
3788 /* first, collect all pa's in the inode */ 3805 /* first, collect all pa's in the inode */
3789 spin_lock(&ei->i_prealloc_lock); 3806 spin_lock(&ei->i_prealloc_lock);
@@ -3853,7 +3870,7 @@ repeat:
3853 3870
3854 ext4_lock_group(sb, group); 3871 ext4_lock_group(sb, group);
3855 list_del(&pa->pa_group_list); 3872 list_del(&pa->pa_group_list);
3856 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); 3873 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3857 ext4_unlock_group(sb, group); 3874 ext4_unlock_group(sb, group);
3858 3875
3859 ext4_mb_unload_buddy(&e4b); 3876 ext4_mb_unload_buddy(&e4b);
@@ -3862,8 +3879,6 @@ repeat:
3862 list_del(&pa->u.pa_tmp_list); 3879 list_del(&pa->u.pa_tmp_list);
3863 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3880 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3864 } 3881 }
3865 if (ac)
3866 kmem_cache_free(ext4_ac_cachep, ac);
3867} 3882}
3868 3883
3869/* 3884/*
@@ -4061,14 +4076,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4061 struct ext4_buddy e4b; 4076 struct ext4_buddy e4b;
4062 struct list_head discard_list; 4077 struct list_head discard_list;
4063 struct ext4_prealloc_space *pa, *tmp; 4078 struct ext4_prealloc_space *pa, *tmp;
4064 struct ext4_allocation_context *ac;
4065 4079
4066 mb_debug(1, "discard locality group preallocation\n"); 4080 mb_debug(1, "discard locality group preallocation\n");
4067 4081
4068 INIT_LIST_HEAD(&discard_list); 4082 INIT_LIST_HEAD(&discard_list);
4069 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4070 if (ac)
4071 ac->ac_sb = sb;
4072 4083
4073 spin_lock(&lg->lg_prealloc_lock); 4084 spin_lock(&lg->lg_prealloc_lock);
4074 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], 4085 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4120,15 +4131,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4120 } 4131 }
4121 ext4_lock_group(sb, group); 4132 ext4_lock_group(sb, group);
4122 list_del(&pa->pa_group_list); 4133 list_del(&pa->pa_group_list);
4123 ext4_mb_release_group_pa(&e4b, pa, ac); 4134 ext4_mb_release_group_pa(&e4b, pa);
4124 ext4_unlock_group(sb, group); 4135 ext4_unlock_group(sb, group);
4125 4136
4126 ext4_mb_unload_buddy(&e4b); 4137 ext4_mb_unload_buddy(&e4b);
4127 list_del(&pa->u.pa_tmp_list); 4138 list_del(&pa->u.pa_tmp_list);
4128 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 4139 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4129 } 4140 }
4130 if (ac)
4131 kmem_cache_free(ext4_ac_cachep, ac);
4132} 4141}
4133 4142
4134/* 4143/*
@@ -4492,7 +4501,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4492{ 4501{
4493 struct buffer_head *bitmap_bh = NULL; 4502 struct buffer_head *bitmap_bh = NULL;
4494 struct super_block *sb = inode->i_sb; 4503 struct super_block *sb = inode->i_sb;
4495 struct ext4_allocation_context *ac = NULL;
4496 struct ext4_group_desc *gdp; 4504 struct ext4_group_desc *gdp;
4497 unsigned long freed = 0; 4505 unsigned long freed = 0;
4498 unsigned int overflow; 4506 unsigned int overflow;
@@ -4532,6 +4540,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4532 if (!bh) 4540 if (!bh)
4533 tbh = sb_find_get_block(inode->i_sb, 4541 tbh = sb_find_get_block(inode->i_sb,
4534 block + i); 4542 block + i);
4543 if (unlikely(!tbh))
4544 continue;
4535 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 4545 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4536 inode, tbh, block + i); 4546 inode, tbh, block + i);
4537 } 4547 }
@@ -4547,12 +4557,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4547 if (!ext4_should_writeback_data(inode)) 4557 if (!ext4_should_writeback_data(inode))
4548 flags |= EXT4_FREE_BLOCKS_METADATA; 4558 flags |= EXT4_FREE_BLOCKS_METADATA;
4549 4559
4550 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4551 if (ac) {
4552 ac->ac_inode = inode;
4553 ac->ac_sb = sb;
4554 }
4555
4556do_more: 4560do_more:
4557 overflow = 0; 4561 overflow = 0;
4558 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 4562 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4610,12 +4614,7 @@ do_more:
4610 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); 4614 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4611 } 4615 }
4612#endif 4616#endif
4613 if (ac) { 4617 trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
4614 ac->ac_b_ex.fe_group = block_group;
4615 ac->ac_b_ex.fe_start = bit;
4616 ac->ac_b_ex.fe_len = count;
4617 trace_ext4_mballoc_free(ac);
4618 }
4619 4618
4620 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4619 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4621 if (err) 4620 if (err)
@@ -4641,12 +4640,12 @@ do_more:
4641 * with group lock held. generate_buddy look at 4640 * with group lock held. generate_buddy look at
4642 * them with group lock_held 4641 * them with group lock_held
4643 */ 4642 */
4643 if (test_opt(sb, DISCARD))
4644 ext4_issue_discard(sb, block_group, bit, count);
4644 ext4_lock_group(sb, block_group); 4645 ext4_lock_group(sb, block_group);
4645 mb_clear_bits(bitmap_bh->b_data, bit, count); 4646 mb_clear_bits(bitmap_bh->b_data, bit, count);
4646 mb_free_blocks(inode, &e4b, bit, count); 4647 mb_free_blocks(inode, &e4b, bit, count);
4647 ext4_mb_return_to_preallocation(inode, &e4b, block, count); 4648 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4648 if (test_opt(sb, DISCARD))
4649 ext4_issue_discard(sb, block_group, bit, count);
4650 } 4649 }
4651 4650
4652 ret = ext4_free_blks_count(sb, gdp) + count; 4651 ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4686,7 +4685,190 @@ error_return:
4686 dquot_free_block(inode, freed); 4685 dquot_free_block(inode, freed);
4687 brelse(bitmap_bh); 4686 brelse(bitmap_bh);
4688 ext4_std_error(sb, err); 4687 ext4_std_error(sb, err);
4689 if (ac)
4690 kmem_cache_free(ext4_ac_cachep, ac);
4691 return; 4688 return;
4692} 4689}
4690
4691/**
4692 * ext4_trim_extent -- function to TRIM one single free extent in the group
4693 * @sb: super block for the file system
4694 * @start: starting block of the free extent in the alloc. group
4695 * @count: number of blocks to TRIM
4696 * @group: alloc. group we are working with
4697 * @e4b: ext4 buddy for the group
4698 *
4699 * Trim "count" blocks starting at "start" in the "group". To assure that no
4700 * one will allocate those blocks, mark it as used in buddy bitmap. This must
4701 * be called with under the group lock.
4702 */
4703static int ext4_trim_extent(struct super_block *sb, int start, int count,
4704 ext4_group_t group, struct ext4_buddy *e4b)
4705{
4706 struct ext4_free_extent ex;
4707 int ret = 0;
4708
4709 assert_spin_locked(ext4_group_lock_ptr(sb, group));
4710
4711 ex.fe_start = start;
4712 ex.fe_group = group;
4713 ex.fe_len = count;
4714
4715 /*
4716 * Mark blocks used, so no one can reuse them while
4717 * being trimmed.
4718 */
4719 mb_mark_used(e4b, &ex);
4720 ext4_unlock_group(sb, group);
4721
4722 ret = ext4_issue_discard(sb, group, start, count);
4723 if (ret)
4724 ext4_std_error(sb, ret);
4725
4726 ext4_lock_group(sb, group);
4727 mb_free_blocks(NULL, e4b, start, ex.fe_len);
4728 return ret;
4729}
4730
4731/**
4732 * ext4_trim_all_free -- function to trim all free space in alloc. group
4733 * @sb: super block for file system
4734 * @e4b: ext4 buddy
4735 * @start: first group block to examine
4736 * @max: last group block to examine
4737 * @minblocks: minimum extent block count
4738 *
4739 * ext4_trim_all_free walks through group's buddy bitmap searching for free
4740 * extents. When the free block is found, ext4_trim_extent is called to TRIM
4741 * the extent.
4742 *
4743 *
4744 * ext4_trim_all_free walks through group's block bitmap searching for free
4745 * extents. When the free extent is found, mark it as used in group buddy
4746 * bitmap. Then issue a TRIM command on this extent and free the extent in
4747 * the group buddy bitmap. This is done until whole group is scanned.
4748 */
4749ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
4750 ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
4751{
4752 void *bitmap;
4753 ext4_grpblk_t next, count = 0;
4754 ext4_group_t group;
4755 int ret = 0;
4756
4757 BUG_ON(e4b == NULL);
4758
4759 bitmap = e4b->bd_bitmap;
4760 group = e4b->bd_group;
4761 start = (e4b->bd_info->bb_first_free > start) ?
4762 e4b->bd_info->bb_first_free : start;
4763 ext4_lock_group(sb, group);
4764
4765 while (start < max) {
4766 start = mb_find_next_zero_bit(bitmap, max, start);
4767 if (start >= max)
4768 break;
4769 next = mb_find_next_bit(bitmap, max, start);
4770
4771 if ((next - start) >= minblocks) {
4772 ret = ext4_trim_extent(sb, start,
4773 next - start, group, e4b);
4774 if (ret < 0)
4775 break;
4776 count += next - start;
4777 }
4778 start = next + 1;
4779
4780 if (fatal_signal_pending(current)) {
4781 count = -ERESTARTSYS;
4782 break;
4783 }
4784
4785 if (need_resched()) {
4786 ext4_unlock_group(sb, group);
4787 cond_resched();
4788 ext4_lock_group(sb, group);
4789 }
4790
4791 if ((e4b->bd_info->bb_free - count) < minblocks)
4792 break;
4793 }
4794 ext4_unlock_group(sb, group);
4795
4796 ext4_debug("trimmed %d blocks in the group %d\n",
4797 count, group);
4798
4799 if (ret < 0)
4800 count = ret;
4801
4802 return count;
4803}
4804
4805/**
4806 * ext4_trim_fs() -- trim ioctl handle function
4807 * @sb: superblock for filesystem
4808 * @range: fstrim_range structure
4809 *
4810 * start: First Byte to trim
4811 * len: number of Bytes to trim from start
4812 * minlen: minimum extent length in Bytes
4813 * ext4_trim_fs goes through all allocation groups containing Bytes from
4814 * start to start+len. For each such a group ext4_trim_all_free function
4815 * is invoked to trim all free space.
4816 */
4817int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4818{
4819 struct ext4_buddy e4b;
4820 ext4_group_t first_group, last_group;
4821 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4822 ext4_grpblk_t cnt = 0, first_block, last_block;
4823 uint64_t start, len, minlen, trimmed;
4824 int ret = 0;
4825
4826 start = range->start >> sb->s_blocksize_bits;
4827 len = range->len >> sb->s_blocksize_bits;
4828 minlen = range->minlen >> sb->s_blocksize_bits;
4829 trimmed = 0;
4830
4831 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
4832 return -EINVAL;
4833
4834 /* Determine first and last group to examine based on start and len */
4835 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
4836 &first_group, &first_block);
4837 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
4838 &last_group, &last_block);
4839 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
4840 last_block = EXT4_BLOCKS_PER_GROUP(sb);
4841
4842 if (first_group > last_group)
4843 return -EINVAL;
4844
4845 for (group = first_group; group <= last_group; group++) {
4846 ret = ext4_mb_load_buddy(sb, group, &e4b);
4847 if (ret) {
4848 ext4_error(sb, "Error in loading buddy "
4849 "information for %u", group);
4850 break;
4851 }
4852
4853 if (len >= EXT4_BLOCKS_PER_GROUP(sb))
4854 len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
4855 else
4856 last_block = len;
4857
4858 if (e4b.bd_info->bb_free >= minlen) {
4859 cnt = ext4_trim_all_free(sb, &e4b, first_block,
4860 last_block, minlen);
4861 if (cnt < 0) {
4862 ret = cnt;
4863 ext4_mb_unload_buddy(&e4b);
4864 break;
4865 }
4866 }
4867 ext4_mb_unload_buddy(&e4b);
4868 trimmed += cnt;
4869 first_block = 0;
4870 }
4871 range->len = trimmed * sb->s_blocksize;
4872
4873 return ret;
4874}