diff options
author | Theodore Ts'o <tytso@mit.edu> | 2010-10-27 23:44:47 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2010-10-27 23:44:47 -0400 |
commit | a107e5a3a473a2ea62bd5af24e11b84adf1486ff (patch) | |
tree | d36c2cb38d8be88d4d75cdebc354aa140aa0e470 /fs/ext4/mballoc.c | |
parent | e3e1288e86a07cdeb0aee5860a2dff111c6eff79 (diff) | |
parent | a269029d0e2192046be4c07ed78a45022469ee4c (diff) |
Merge branch 'next' into upstream-merge
Conflicts:
fs/ext4/inode.c
fs/ext4/mballoc.c
include/trace/events/ext4.h
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 554 |
1 files changed, 368 insertions, 186 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 42f77b1dc72d..c58eba34724a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -338,6 +338,14 @@ | |||
338 | static struct kmem_cache *ext4_pspace_cachep; | 338 | static struct kmem_cache *ext4_pspace_cachep; |
339 | static struct kmem_cache *ext4_ac_cachep; | 339 | static struct kmem_cache *ext4_ac_cachep; |
340 | static struct kmem_cache *ext4_free_ext_cachep; | 340 | static struct kmem_cache *ext4_free_ext_cachep; |
341 | |||
342 | /* We create slab caches for groupinfo data structures based on the | ||
343 | * superblock block size. There will be one per mounted filesystem for | ||
344 | * each unique s_blocksize_bits */ | ||
345 | #define NR_GRPINFO_CACHES \ | ||
346 | (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1) | ||
347 | static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; | ||
348 | |||
341 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 349 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
342 | ext4_group_t group); | 350 | ext4_group_t group); |
343 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 351 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
@@ -939,6 +947,85 @@ out: | |||
939 | } | 947 | } |
940 | 948 | ||
941 | /* | 949 | /* |
950 | * lock the group_info alloc_sem of all the groups | ||
951 | * belonging to the same buddy cache page. This | ||
952 | * make sure other parallel operation on the buddy | ||
953 | * cache doesn't happen whild holding the buddy cache | ||
954 | * lock | ||
955 | */ | ||
956 | static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, | ||
957 | ext4_group_t group) | ||
958 | { | ||
959 | int i; | ||
960 | int block, pnum; | ||
961 | int blocks_per_page; | ||
962 | int groups_per_page; | ||
963 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
964 | ext4_group_t first_group; | ||
965 | struct ext4_group_info *grp; | ||
966 | |||
967 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
968 | /* | ||
969 | * the buddy cache inode stores the block bitmap | ||
970 | * and buddy information in consecutive blocks. | ||
971 | * So for each group we need two blocks. | ||
972 | */ | ||
973 | block = group * 2; | ||
974 | pnum = block / blocks_per_page; | ||
975 | first_group = pnum * blocks_per_page / 2; | ||
976 | |||
977 | groups_per_page = blocks_per_page >> 1; | ||
978 | if (groups_per_page == 0) | ||
979 | groups_per_page = 1; | ||
980 | /* read all groups the page covers into the cache */ | ||
981 | for (i = 0; i < groups_per_page; i++) { | ||
982 | |||
983 | if ((first_group + i) >= ngroups) | ||
984 | break; | ||
985 | grp = ext4_get_group_info(sb, first_group + i); | ||
986 | /* take all groups write allocation | ||
987 | * semaphore. This make sure there is | ||
988 | * no block allocation going on in any | ||
989 | * of that groups | ||
990 | */ | ||
991 | down_write_nested(&grp->alloc_sem, i); | ||
992 | } | ||
993 | return i; | ||
994 | } | ||
995 | |||
996 | static void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | ||
997 | ext4_group_t group, int locked_group) | ||
998 | { | ||
999 | int i; | ||
1000 | int block, pnum; | ||
1001 | int blocks_per_page; | ||
1002 | ext4_group_t first_group; | ||
1003 | struct ext4_group_info *grp; | ||
1004 | |||
1005 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1006 | /* | ||
1007 | * the buddy cache inode stores the block bitmap | ||
1008 | * and buddy information in consecutive blocks. | ||
1009 | * So for each group we need two blocks. | ||
1010 | */ | ||
1011 | block = group * 2; | ||
1012 | pnum = block / blocks_per_page; | ||
1013 | first_group = pnum * blocks_per_page / 2; | ||
1014 | /* release locks on all the groups */ | ||
1015 | for (i = 0; i < locked_group; i++) { | ||
1016 | |||
1017 | grp = ext4_get_group_info(sb, first_group + i); | ||
1018 | /* take all groups write allocation | ||
1019 | * semaphore. This make sure there is | ||
1020 | * no block allocation going on in any | ||
1021 | * of that groups | ||
1022 | */ | ||
1023 | up_write(&grp->alloc_sem); | ||
1024 | } | ||
1025 | |||
1026 | } | ||
1027 | |||
1028 | /* | ||
942 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the | 1029 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the |
943 | * block group lock of all groups for this page; do not hold the BG lock when | 1030 | * block group lock of all groups for this page; do not hold the BG lock when |
944 | * calling this routine! | 1031 | * calling this routine! |
@@ -1915,84 +2002,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1915 | return 0; | 2002 | return 0; |
1916 | } | 2003 | } |
1917 | 2004 | ||
1918 | /* | ||
1919 | * lock the group_info alloc_sem of all the groups | ||
1920 | * belonging to the same buddy cache page. This | ||
1921 | * make sure other parallel operation on the buddy | ||
1922 | * cache doesn't happen whild holding the buddy cache | ||
1923 | * lock | ||
1924 | */ | ||
1925 | int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | ||
1926 | { | ||
1927 | int i; | ||
1928 | int block, pnum; | ||
1929 | int blocks_per_page; | ||
1930 | int groups_per_page; | ||
1931 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
1932 | ext4_group_t first_group; | ||
1933 | struct ext4_group_info *grp; | ||
1934 | |||
1935 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1936 | /* | ||
1937 | * the buddy cache inode stores the block bitmap | ||
1938 | * and buddy information in consecutive blocks. | ||
1939 | * So for each group we need two blocks. | ||
1940 | */ | ||
1941 | block = group * 2; | ||
1942 | pnum = block / blocks_per_page; | ||
1943 | first_group = pnum * blocks_per_page / 2; | ||
1944 | |||
1945 | groups_per_page = blocks_per_page >> 1; | ||
1946 | if (groups_per_page == 0) | ||
1947 | groups_per_page = 1; | ||
1948 | /* read all groups the page covers into the cache */ | ||
1949 | for (i = 0; i < groups_per_page; i++) { | ||
1950 | |||
1951 | if ((first_group + i) >= ngroups) | ||
1952 | break; | ||
1953 | grp = ext4_get_group_info(sb, first_group + i); | ||
1954 | /* take all groups write allocation | ||
1955 | * semaphore. This make sure there is | ||
1956 | * no block allocation going on in any | ||
1957 | * of that groups | ||
1958 | */ | ||
1959 | down_write_nested(&grp->alloc_sem, i); | ||
1960 | } | ||
1961 | return i; | ||
1962 | } | ||
1963 | |||
1964 | void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | ||
1965 | ext4_group_t group, int locked_group) | ||
1966 | { | ||
1967 | int i; | ||
1968 | int block, pnum; | ||
1969 | int blocks_per_page; | ||
1970 | ext4_group_t first_group; | ||
1971 | struct ext4_group_info *grp; | ||
1972 | |||
1973 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1974 | /* | ||
1975 | * the buddy cache inode stores the block bitmap | ||
1976 | * and buddy information in consecutive blocks. | ||
1977 | * So for each group we need two blocks. | ||
1978 | */ | ||
1979 | block = group * 2; | ||
1980 | pnum = block / blocks_per_page; | ||
1981 | first_group = pnum * blocks_per_page / 2; | ||
1982 | /* release locks on all the groups */ | ||
1983 | for (i = 0; i < locked_group; i++) { | ||
1984 | |||
1985 | grp = ext4_get_group_info(sb, first_group + i); | ||
1986 | /* take all groups write allocation | ||
1987 | * semaphore. This make sure there is | ||
1988 | * no block allocation going on in any | ||
1989 | * of that groups | ||
1990 | */ | ||
1991 | up_write(&grp->alloc_sem); | ||
1992 | } | ||
1993 | |||
1994 | } | ||
1995 | |||
1996 | static noinline_for_stack int | 2005 | static noinline_for_stack int |
1997 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 2006 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1998 | { | 2007 | { |
@@ -2233,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = { | |||
2233 | .release = seq_release, | 2242 | .release = seq_release, |
2234 | }; | 2243 | }; |
2235 | 2244 | ||
2245 | static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) | ||
2246 | { | ||
2247 | int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2248 | struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index]; | ||
2249 | |||
2250 | BUG_ON(!cachep); | ||
2251 | return cachep; | ||
2252 | } | ||
2236 | 2253 | ||
2237 | /* Create and initialize ext4_group_info data for the given group. */ | 2254 | /* Create and initialize ext4_group_info data for the given group. */ |
2238 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | 2255 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, |
2239 | struct ext4_group_desc *desc) | 2256 | struct ext4_group_desc *desc) |
2240 | { | 2257 | { |
2241 | int i, len; | 2258 | int i; |
2242 | int metalen = 0; | 2259 | int metalen = 0; |
2243 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2260 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2244 | struct ext4_group_info **meta_group_info; | 2261 | struct ext4_group_info **meta_group_info; |
2262 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2245 | 2263 | ||
2246 | /* | 2264 | /* |
2247 | * First check if this group is the first of a reserved block. | 2265 | * First check if this group is the first of a reserved block. |
@@ -2261,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2261 | meta_group_info; | 2279 | meta_group_info; |
2262 | } | 2280 | } |
2263 | 2281 | ||
2264 | /* | ||
2265 | * calculate needed size. if change bb_counters size, | ||
2266 | * don't forget about ext4_mb_generate_buddy() | ||
2267 | */ | ||
2268 | len = offsetof(typeof(**meta_group_info), | ||
2269 | bb_counters[sb->s_blocksize_bits + 2]); | ||
2270 | |||
2271 | meta_group_info = | 2282 | meta_group_info = |
2272 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | 2283 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; |
2273 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 2284 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
2274 | 2285 | ||
2275 | meta_group_info[i] = kzalloc(len, GFP_KERNEL); | 2286 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); |
2276 | if (meta_group_info[i] == NULL) { | 2287 | if (meta_group_info[i] == NULL) { |
2277 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | 2288 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); |
2278 | goto exit_group_info; | 2289 | goto exit_group_info; |
2279 | } | 2290 | } |
2291 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); | ||
2280 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | 2292 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, |
2281 | &(meta_group_info[i]->bb_state)); | 2293 | &(meta_group_info[i]->bb_state)); |
2282 | 2294 | ||
@@ -2331,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2331 | int num_meta_group_infos_max; | 2343 | int num_meta_group_infos_max; |
2332 | int array_size; | 2344 | int array_size; |
2333 | struct ext4_group_desc *desc; | 2345 | struct ext4_group_desc *desc; |
2346 | struct kmem_cache *cachep; | ||
2334 | 2347 | ||
2335 | /* This is the number of blocks used by GDT */ | 2348 | /* This is the number of blocks used by GDT */ |
2336 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - | 2349 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - |
@@ -2389,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2389 | return 0; | 2402 | return 0; |
2390 | 2403 | ||
2391 | err_freebuddy: | 2404 | err_freebuddy: |
2405 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2392 | while (i-- > 0) | 2406 | while (i-- > 0) |
2393 | kfree(ext4_get_group_info(sb, i)); | 2407 | kmem_cache_free(cachep, ext4_get_group_info(sb, i)); |
2394 | i = num_meta_group_infos; | 2408 | i = num_meta_group_infos; |
2395 | while (i-- > 0) | 2409 | while (i-- > 0) |
2396 | kfree(sbi->s_group_info[i]); | 2410 | kfree(sbi->s_group_info[i]); |
@@ -2407,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2407 | unsigned offset; | 2421 | unsigned offset; |
2408 | unsigned max; | 2422 | unsigned max; |
2409 | int ret; | 2423 | int ret; |
2424 | int cache_index; | ||
2425 | struct kmem_cache *cachep; | ||
2426 | char *namep = NULL; | ||
2410 | 2427 | ||
2411 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); | 2428 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); |
2412 | 2429 | ||
2413 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2430 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2414 | if (sbi->s_mb_offsets == NULL) { | 2431 | if (sbi->s_mb_offsets == NULL) { |
2415 | return -ENOMEM; | 2432 | ret = -ENOMEM; |
2433 | goto out; | ||
2416 | } | 2434 | } |
2417 | 2435 | ||
2418 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); | 2436 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); |
2419 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2437 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2420 | if (sbi->s_mb_maxs == NULL) { | 2438 | if (sbi->s_mb_maxs == NULL) { |
2421 | kfree(sbi->s_mb_offsets); | 2439 | ret = -ENOMEM; |
2422 | return -ENOMEM; | 2440 | goto out; |
2441 | } | ||
2442 | |||
2443 | cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2444 | cachep = ext4_groupinfo_caches[cache_index]; | ||
2445 | if (!cachep) { | ||
2446 | char name[32]; | ||
2447 | int len = offsetof(struct ext4_group_info, | ||
2448 | bb_counters[sb->s_blocksize_bits + 2]); | ||
2449 | |||
2450 | sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits); | ||
2451 | namep = kstrdup(name, GFP_KERNEL); | ||
2452 | if (!namep) { | ||
2453 | ret = -ENOMEM; | ||
2454 | goto out; | ||
2455 | } | ||
2456 | |||
2457 | /* Need to free the kmem_cache_name() when we | ||
2458 | * destroy the slab */ | ||
2459 | cachep = kmem_cache_create(namep, len, 0, | ||
2460 | SLAB_RECLAIM_ACCOUNT, NULL); | ||
2461 | if (!cachep) { | ||
2462 | ret = -ENOMEM; | ||
2463 | goto out; | ||
2464 | } | ||
2465 | ext4_groupinfo_caches[cache_index] = cachep; | ||
2423 | } | 2466 | } |
2424 | 2467 | ||
2425 | /* order 0 is regular bitmap */ | 2468 | /* order 0 is regular bitmap */ |
@@ -2440,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2440 | /* init file for buddy data */ | 2483 | /* init file for buddy data */ |
2441 | ret = ext4_mb_init_backend(sb); | 2484 | ret = ext4_mb_init_backend(sb); |
2442 | if (ret != 0) { | 2485 | if (ret != 0) { |
2443 | kfree(sbi->s_mb_offsets); | 2486 | goto out; |
2444 | kfree(sbi->s_mb_maxs); | ||
2445 | return ret; | ||
2446 | } | 2487 | } |
2447 | 2488 | ||
2448 | spin_lock_init(&sbi->s_md_lock); | 2489 | spin_lock_init(&sbi->s_md_lock); |
@@ -2457,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2457 | 2498 | ||
2458 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); | 2499 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2459 | if (sbi->s_locality_groups == NULL) { | 2500 | if (sbi->s_locality_groups == NULL) { |
2460 | kfree(sbi->s_mb_offsets); | 2501 | ret = -ENOMEM; |
2461 | kfree(sbi->s_mb_maxs); | 2502 | goto out; |
2462 | return -ENOMEM; | ||
2463 | } | 2503 | } |
2464 | for_each_possible_cpu(i) { | 2504 | for_each_possible_cpu(i) { |
2465 | struct ext4_locality_group *lg; | 2505 | struct ext4_locality_group *lg; |
@@ -2476,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2476 | 2516 | ||
2477 | if (sbi->s_journal) | 2517 | if (sbi->s_journal) |
2478 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | 2518 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; |
2479 | return 0; | 2519 | out: |
2520 | if (ret) { | ||
2521 | kfree(sbi->s_mb_offsets); | ||
2522 | kfree(sbi->s_mb_maxs); | ||
2523 | kfree(namep); | ||
2524 | } | ||
2525 | return ret; | ||
2480 | } | 2526 | } |
2481 | 2527 | ||
2482 | /* need to called with the ext4 group lock held */ | 2528 | /* need to called with the ext4 group lock held */ |
@@ -2504,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2504 | int num_meta_group_infos; | 2550 | int num_meta_group_infos; |
2505 | struct ext4_group_info *grinfo; | 2551 | struct ext4_group_info *grinfo; |
2506 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2552 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2553 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2507 | 2554 | ||
2508 | if (sbi->s_group_info) { | 2555 | if (sbi->s_group_info) { |
2509 | for (i = 0; i < ngroups; i++) { | 2556 | for (i = 0; i < ngroups; i++) { |
@@ -2514,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2514 | ext4_lock_group(sb, i); | 2561 | ext4_lock_group(sb, i); |
2515 | ext4_mb_cleanup_pa(grinfo); | 2562 | ext4_mb_cleanup_pa(grinfo); |
2516 | ext4_unlock_group(sb, i); | 2563 | ext4_unlock_group(sb, i); |
2517 | kfree(grinfo); | 2564 | kmem_cache_free(cachep, grinfo); |
2518 | } | 2565 | } |
2519 | num_meta_group_infos = (ngroups + | 2566 | num_meta_group_infos = (ngroups + |
2520 | EXT4_DESC_PER_BLOCK(sb) - 1) >> | 2567 | EXT4_DESC_PER_BLOCK(sb) - 1) >> |
@@ -2558,7 +2605,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2558 | return 0; | 2605 | return 0; |
2559 | } | 2606 | } |
2560 | 2607 | ||
2561 | static inline void ext4_issue_discard(struct super_block *sb, | 2608 | static inline int ext4_issue_discard(struct super_block *sb, |
2562 | ext4_group_t block_group, ext4_grpblk_t block, int count) | 2609 | ext4_group_t block_group, ext4_grpblk_t block, int count) |
2563 | { | 2610 | { |
2564 | int ret; | 2611 | int ret; |
@@ -2568,10 +2615,11 @@ static inline void ext4_issue_discard(struct super_block *sb, | |||
2568 | trace_ext4_discard_blocks(sb, | 2615 | trace_ext4_discard_blocks(sb, |
2569 | (unsigned long long) discard_block, count); | 2616 | (unsigned long long) discard_block, count); |
2570 | ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); | 2617 | ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); |
2571 | if (ret == EOPNOTSUPP) { | 2618 | if (ret == -EOPNOTSUPP) { |
2572 | ext4_warning(sb, "discard not supported, disabling"); | 2619 | ext4_warning(sb, "discard not supported, disabling"); |
2573 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | 2620 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); |
2574 | } | 2621 | } |
2622 | return ret; | ||
2575 | } | 2623 | } |
2576 | 2624 | ||
2577 | /* | 2625 | /* |
@@ -2659,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void) | |||
2659 | 2707 | ||
2660 | #endif | 2708 | #endif |
2661 | 2709 | ||
2662 | int __init init_ext4_mballoc(void) | 2710 | int __init ext4_init_mballoc(void) |
2663 | { | 2711 | { |
2664 | ext4_pspace_cachep = | 2712 | ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space, |
2665 | kmem_cache_create("ext4_prealloc_space", | 2713 | SLAB_RECLAIM_ACCOUNT); |
2666 | sizeof(struct ext4_prealloc_space), | ||
2667 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2668 | if (ext4_pspace_cachep == NULL) | 2714 | if (ext4_pspace_cachep == NULL) |
2669 | return -ENOMEM; | 2715 | return -ENOMEM; |
2670 | 2716 | ||
2671 | ext4_ac_cachep = | 2717 | ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context, |
2672 | kmem_cache_create("ext4_alloc_context", | 2718 | SLAB_RECLAIM_ACCOUNT); |
2673 | sizeof(struct ext4_allocation_context), | ||
2674 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2675 | if (ext4_ac_cachep == NULL) { | 2719 | if (ext4_ac_cachep == NULL) { |
2676 | kmem_cache_destroy(ext4_pspace_cachep); | 2720 | kmem_cache_destroy(ext4_pspace_cachep); |
2677 | return -ENOMEM; | 2721 | return -ENOMEM; |
2678 | } | 2722 | } |
2679 | 2723 | ||
2680 | ext4_free_ext_cachep = | 2724 | ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, |
2681 | kmem_cache_create("ext4_free_block_extents", | 2725 | SLAB_RECLAIM_ACCOUNT); |
2682 | sizeof(struct ext4_free_data), | ||
2683 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2684 | if (ext4_free_ext_cachep == NULL) { | 2726 | if (ext4_free_ext_cachep == NULL) { |
2685 | kmem_cache_destroy(ext4_pspace_cachep); | 2727 | kmem_cache_destroy(ext4_pspace_cachep); |
2686 | kmem_cache_destroy(ext4_ac_cachep); | 2728 | kmem_cache_destroy(ext4_ac_cachep); |
@@ -2690,8 +2732,9 @@ int __init init_ext4_mballoc(void) | |||
2690 | return 0; | 2732 | return 0; |
2691 | } | 2733 | } |
2692 | 2734 | ||
2693 | void exit_ext4_mballoc(void) | 2735 | void ext4_exit_mballoc(void) |
2694 | { | 2736 | { |
2737 | int i; | ||
2695 | /* | 2738 | /* |
2696 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep | 2739 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep |
2697 | * before destroying the slab cache. | 2740 | * before destroying the slab cache. |
@@ -2700,6 +2743,15 @@ void exit_ext4_mballoc(void) | |||
2700 | kmem_cache_destroy(ext4_pspace_cachep); | 2743 | kmem_cache_destroy(ext4_pspace_cachep); |
2701 | kmem_cache_destroy(ext4_ac_cachep); | 2744 | kmem_cache_destroy(ext4_ac_cachep); |
2702 | kmem_cache_destroy(ext4_free_ext_cachep); | 2745 | kmem_cache_destroy(ext4_free_ext_cachep); |
2746 | |||
2747 | for (i = 0; i < NR_GRPINFO_CACHES; i++) { | ||
2748 | struct kmem_cache *cachep = ext4_groupinfo_caches[i]; | ||
2749 | if (cachep) { | ||
2750 | char *name = (char *)kmem_cache_name(cachep); | ||
2751 | kmem_cache_destroy(cachep); | ||
2752 | kfree(name); | ||
2753 | } | ||
2754 | } | ||
2703 | ext4_remove_debugfs_entry(); | 2755 | ext4_remove_debugfs_entry(); |
2704 | } | 2756 | } |
2705 | 2757 | ||
@@ -3536,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) | |||
3536 | */ | 3588 | */ |
3537 | static noinline_for_stack int | 3589 | static noinline_for_stack int |
3538 | ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | 3590 | ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, |
3539 | struct ext4_prealloc_space *pa, | 3591 | struct ext4_prealloc_space *pa) |
3540 | struct ext4_allocation_context *ac) | ||
3541 | { | 3592 | { |
3542 | struct super_block *sb = e4b->bd_sb; | 3593 | struct super_block *sb = e4b->bd_sb; |
3543 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3594 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -3555,11 +3606,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3555 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3606 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3556 | end = bit + pa->pa_len; | 3607 | end = bit + pa->pa_len; |
3557 | 3608 | ||
3558 | if (ac) { | ||
3559 | ac->ac_sb = sb; | ||
3560 | ac->ac_inode = pa->pa_inode; | ||
3561 | } | ||
3562 | |||
3563 | while (bit < end) { | 3609 | while (bit < end) { |
3564 | bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); | 3610 | bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); |
3565 | if (bit >= end) | 3611 | if (bit >= end) |
@@ -3570,16 +3616,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3570 | (unsigned) next - bit, (unsigned) group); | 3616 | (unsigned) next - bit, (unsigned) group); |
3571 | free += next - bit; | 3617 | free += next - bit; |
3572 | 3618 | ||
3573 | if (ac) { | 3619 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); |
3574 | ac->ac_b_ex.fe_group = group; | 3620 | trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa, |
3575 | ac->ac_b_ex.fe_start = bit; | 3621 | grp_blk_start + bit, next - bit); |
3576 | ac->ac_b_ex.fe_len = next - bit; | ||
3577 | ac->ac_b_ex.fe_logical = 0; | ||
3578 | trace_ext4_mballoc_discard(ac); | ||
3579 | } | ||
3580 | |||
3581 | trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit, | ||
3582 | next - bit); | ||
3583 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3622 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
3584 | bit = next + 1; | 3623 | bit = next + 1; |
3585 | } | 3624 | } |
@@ -3602,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3602 | 3641 | ||
3603 | static noinline_for_stack int | 3642 | static noinline_for_stack int |
3604 | ext4_mb_release_group_pa(struct ext4_buddy *e4b, | 3643 | ext4_mb_release_group_pa(struct ext4_buddy *e4b, |
3605 | struct ext4_prealloc_space *pa, | 3644 | struct ext4_prealloc_space *pa) |
3606 | struct ext4_allocation_context *ac) | ||
3607 | { | 3645 | { |
3608 | struct super_block *sb = e4b->bd_sb; | 3646 | struct super_block *sb = e4b->bd_sb; |
3609 | ext4_group_t group; | 3647 | ext4_group_t group; |
3610 | ext4_grpblk_t bit; | 3648 | ext4_grpblk_t bit; |
3611 | 3649 | ||
3612 | trace_ext4_mb_release_group_pa(sb, ac, pa); | 3650 | trace_ext4_mb_release_group_pa(sb, pa); |
3613 | BUG_ON(pa->pa_deleted == 0); | 3651 | BUG_ON(pa->pa_deleted == 0); |
3614 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3652 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3615 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3653 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3616 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); | 3654 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); |
3617 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); | 3655 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); |
3618 | 3656 | trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); | |
3619 | if (ac) { | ||
3620 | ac->ac_sb = sb; | ||
3621 | ac->ac_inode = NULL; | ||
3622 | ac->ac_b_ex.fe_group = group; | ||
3623 | ac->ac_b_ex.fe_start = bit; | ||
3624 | ac->ac_b_ex.fe_len = pa->pa_len; | ||
3625 | ac->ac_b_ex.fe_logical = 0; | ||
3626 | trace_ext4_mballoc_discard(ac); | ||
3627 | } | ||
3628 | 3657 | ||
3629 | return 0; | 3658 | return 0; |
3630 | } | 3659 | } |
@@ -3645,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3645 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 3674 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
3646 | struct buffer_head *bitmap_bh = NULL; | 3675 | struct buffer_head *bitmap_bh = NULL; |
3647 | struct ext4_prealloc_space *pa, *tmp; | 3676 | struct ext4_prealloc_space *pa, *tmp; |
3648 | struct ext4_allocation_context *ac; | ||
3649 | struct list_head list; | 3677 | struct list_head list; |
3650 | struct ext4_buddy e4b; | 3678 | struct ext4_buddy e4b; |
3651 | int err; | 3679 | int err; |
@@ -3674,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3674 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; | 3702 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; |
3675 | 3703 | ||
3676 | INIT_LIST_HEAD(&list); | 3704 | INIT_LIST_HEAD(&list); |
3677 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
3678 | if (ac) | ||
3679 | ac->ac_sb = sb; | ||
3680 | repeat: | 3705 | repeat: |
3681 | ext4_lock_group(sb, group); | 3706 | ext4_lock_group(sb, group); |
3682 | list_for_each_entry_safe(pa, tmp, | 3707 | list_for_each_entry_safe(pa, tmp, |
@@ -3731,9 +3756,9 @@ repeat: | |||
3731 | spin_unlock(pa->pa_obj_lock); | 3756 | spin_unlock(pa->pa_obj_lock); |
3732 | 3757 | ||
3733 | if (pa->pa_type == MB_GROUP_PA) | 3758 | if (pa->pa_type == MB_GROUP_PA) |
3734 | ext4_mb_release_group_pa(&e4b, pa, ac); | 3759 | ext4_mb_release_group_pa(&e4b, pa); |
3735 | else | 3760 | else |
3736 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3761 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); |
3737 | 3762 | ||
3738 | list_del(&pa->u.pa_tmp_list); | 3763 | list_del(&pa->u.pa_tmp_list); |
3739 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 3764 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
@@ -3741,8 +3766,6 @@ repeat: | |||
3741 | 3766 | ||
3742 | out: | 3767 | out: |
3743 | ext4_unlock_group(sb, group); | 3768 | ext4_unlock_group(sb, group); |
3744 | if (ac) | ||
3745 | kmem_cache_free(ext4_ac_cachep, ac); | ||
3746 | ext4_mb_unload_buddy(&e4b); | 3769 | ext4_mb_unload_buddy(&e4b); |
3747 | put_bh(bitmap_bh); | 3770 | put_bh(bitmap_bh); |
3748 | return free; | 3771 | return free; |
@@ -3763,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3763 | struct super_block *sb = inode->i_sb; | 3786 | struct super_block *sb = inode->i_sb; |
3764 | struct buffer_head *bitmap_bh = NULL; | 3787 | struct buffer_head *bitmap_bh = NULL; |
3765 | struct ext4_prealloc_space *pa, *tmp; | 3788 | struct ext4_prealloc_space *pa, *tmp; |
3766 | struct ext4_allocation_context *ac; | ||
3767 | ext4_group_t group = 0; | 3789 | ext4_group_t group = 0; |
3768 | struct list_head list; | 3790 | struct list_head list; |
3769 | struct ext4_buddy e4b; | 3791 | struct ext4_buddy e4b; |
@@ -3779,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3779 | 3801 | ||
3780 | INIT_LIST_HEAD(&list); | 3802 | INIT_LIST_HEAD(&list); |
3781 | 3803 | ||
3782 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
3783 | if (ac) { | ||
3784 | ac->ac_sb = sb; | ||
3785 | ac->ac_inode = inode; | ||
3786 | } | ||
3787 | repeat: | 3804 | repeat: |
3788 | /* first, collect all pa's in the inode */ | 3805 | /* first, collect all pa's in the inode */ |
3789 | spin_lock(&ei->i_prealloc_lock); | 3806 | spin_lock(&ei->i_prealloc_lock); |
@@ -3853,7 +3870,7 @@ repeat: | |||
3853 | 3870 | ||
3854 | ext4_lock_group(sb, group); | 3871 | ext4_lock_group(sb, group); |
3855 | list_del(&pa->pa_group_list); | 3872 | list_del(&pa->pa_group_list); |
3856 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3873 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); |
3857 | ext4_unlock_group(sb, group); | 3874 | ext4_unlock_group(sb, group); |
3858 | 3875 | ||
3859 | ext4_mb_unload_buddy(&e4b); | 3876 | ext4_mb_unload_buddy(&e4b); |
@@ -3862,8 +3879,6 @@ repeat: | |||
3862 | list_del(&pa->u.pa_tmp_list); | 3879 | list_del(&pa->u.pa_tmp_list); |
3863 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 3880 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
3864 | } | 3881 | } |
3865 | if (ac) | ||
3866 | kmem_cache_free(ext4_ac_cachep, ac); | ||
3867 | } | 3882 | } |
3868 | 3883 | ||
3869 | /* | 3884 | /* |
@@ -4061,14 +4076,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4061 | struct ext4_buddy e4b; | 4076 | struct ext4_buddy e4b; |
4062 | struct list_head discard_list; | 4077 | struct list_head discard_list; |
4063 | struct ext4_prealloc_space *pa, *tmp; | 4078 | struct ext4_prealloc_space *pa, *tmp; |
4064 | struct ext4_allocation_context *ac; | ||
4065 | 4079 | ||
4066 | mb_debug(1, "discard locality group preallocation\n"); | 4080 | mb_debug(1, "discard locality group preallocation\n"); |
4067 | 4081 | ||
4068 | INIT_LIST_HEAD(&discard_list); | 4082 | INIT_LIST_HEAD(&discard_list); |
4069 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
4070 | if (ac) | ||
4071 | ac->ac_sb = sb; | ||
4072 | 4083 | ||
4073 | spin_lock(&lg->lg_prealloc_lock); | 4084 | spin_lock(&lg->lg_prealloc_lock); |
4074 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], | 4085 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], |
@@ -4120,15 +4131,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4120 | } | 4131 | } |
4121 | ext4_lock_group(sb, group); | 4132 | ext4_lock_group(sb, group); |
4122 | list_del(&pa->pa_group_list); | 4133 | list_del(&pa->pa_group_list); |
4123 | ext4_mb_release_group_pa(&e4b, pa, ac); | 4134 | ext4_mb_release_group_pa(&e4b, pa); |
4124 | ext4_unlock_group(sb, group); | 4135 | ext4_unlock_group(sb, group); |
4125 | 4136 | ||
4126 | ext4_mb_unload_buddy(&e4b); | 4137 | ext4_mb_unload_buddy(&e4b); |
4127 | list_del(&pa->u.pa_tmp_list); | 4138 | list_del(&pa->u.pa_tmp_list); |
4128 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 4139 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
4129 | } | 4140 | } |
4130 | if (ac) | ||
4131 | kmem_cache_free(ext4_ac_cachep, ac); | ||
4132 | } | 4141 | } |
4133 | 4142 | ||
4134 | /* | 4143 | /* |
@@ -4492,7 +4501,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4492 | { | 4501 | { |
4493 | struct buffer_head *bitmap_bh = NULL; | 4502 | struct buffer_head *bitmap_bh = NULL; |
4494 | struct super_block *sb = inode->i_sb; | 4503 | struct super_block *sb = inode->i_sb; |
4495 | struct ext4_allocation_context *ac = NULL; | ||
4496 | struct ext4_group_desc *gdp; | 4504 | struct ext4_group_desc *gdp; |
4497 | unsigned long freed = 0; | 4505 | unsigned long freed = 0; |
4498 | unsigned int overflow; | 4506 | unsigned int overflow; |
@@ -4532,6 +4540,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4532 | if (!bh) | 4540 | if (!bh) |
4533 | tbh = sb_find_get_block(inode->i_sb, | 4541 | tbh = sb_find_get_block(inode->i_sb, |
4534 | block + i); | 4542 | block + i); |
4543 | if (unlikely(!tbh)) | ||
4544 | continue; | ||
4535 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | 4545 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
4536 | inode, tbh, block + i); | 4546 | inode, tbh, block + i); |
4537 | } | 4547 | } |
@@ -4547,12 +4557,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4547 | if (!ext4_should_writeback_data(inode)) | 4557 | if (!ext4_should_writeback_data(inode)) |
4548 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4558 | flags |= EXT4_FREE_BLOCKS_METADATA; |
4549 | 4559 | ||
4550 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
4551 | if (ac) { | ||
4552 | ac->ac_inode = inode; | ||
4553 | ac->ac_sb = sb; | ||
4554 | } | ||
4555 | |||
4556 | do_more: | 4560 | do_more: |
4557 | overflow = 0; | 4561 | overflow = 0; |
4558 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 4562 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
@@ -4610,12 +4614,7 @@ do_more: | |||
4610 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); | 4614 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); |
4611 | } | 4615 | } |
4612 | #endif | 4616 | #endif |
4613 | if (ac) { | 4617 | trace_ext4_mballoc_free(sb, inode, block_group, bit, count); |
4614 | ac->ac_b_ex.fe_group = block_group; | ||
4615 | ac->ac_b_ex.fe_start = bit; | ||
4616 | ac->ac_b_ex.fe_len = count; | ||
4617 | trace_ext4_mballoc_free(ac); | ||
4618 | } | ||
4619 | 4618 | ||
4620 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | 4619 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
4621 | if (err) | 4620 | if (err) |
@@ -4641,12 +4640,12 @@ do_more: | |||
4641 | * with group lock held. generate_buddy look at | 4640 | * with group lock held. generate_buddy look at |
4642 | * them with group lock_held | 4641 | * them with group lock_held |
4643 | */ | 4642 | */ |
4643 | if (test_opt(sb, DISCARD)) | ||
4644 | ext4_issue_discard(sb, block_group, bit, count); | ||
4644 | ext4_lock_group(sb, block_group); | 4645 | ext4_lock_group(sb, block_group); |
4645 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4646 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4646 | mb_free_blocks(inode, &e4b, bit, count); | 4647 | mb_free_blocks(inode, &e4b, bit, count); |
4647 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | 4648 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); |
4648 | if (test_opt(sb, DISCARD)) | ||
4649 | ext4_issue_discard(sb, block_group, bit, count); | ||
4650 | } | 4649 | } |
4651 | 4650 | ||
4652 | ret = ext4_free_blks_count(sb, gdp) + count; | 4651 | ret = ext4_free_blks_count(sb, gdp) + count; |
@@ -4686,7 +4685,190 @@ error_return: | |||
4686 | dquot_free_block(inode, freed); | 4685 | dquot_free_block(inode, freed); |
4687 | brelse(bitmap_bh); | 4686 | brelse(bitmap_bh); |
4688 | ext4_std_error(sb, err); | 4687 | ext4_std_error(sb, err); |
4689 | if (ac) | ||
4690 | kmem_cache_free(ext4_ac_cachep, ac); | ||
4691 | return; | 4688 | return; |
4692 | } | 4689 | } |
4690 | |||
4691 | /** | ||
4692 | * ext4_trim_extent -- function to TRIM one single free extent in the group | ||
4693 | * @sb: super block for the file system | ||
4694 | * @start: starting block of the free extent in the alloc. group | ||
4695 | * @count: number of blocks to TRIM | ||
4696 | * @group: alloc. group we are working with | ||
4697 | * @e4b: ext4 buddy for the group | ||
4698 | * | ||
4699 | * Trim "count" blocks starting at "start" in the "group". To assure that no | ||
4700 | * one will allocate those blocks, mark it as used in buddy bitmap. This must | ||
4701 | * be called with under the group lock. | ||
4702 | */ | ||
4703 | static int ext4_trim_extent(struct super_block *sb, int start, int count, | ||
4704 | ext4_group_t group, struct ext4_buddy *e4b) | ||
4705 | { | ||
4706 | struct ext4_free_extent ex; | ||
4707 | int ret = 0; | ||
4708 | |||
4709 | assert_spin_locked(ext4_group_lock_ptr(sb, group)); | ||
4710 | |||
4711 | ex.fe_start = start; | ||
4712 | ex.fe_group = group; | ||
4713 | ex.fe_len = count; | ||
4714 | |||
4715 | /* | ||
4716 | * Mark blocks used, so no one can reuse them while | ||
4717 | * being trimmed. | ||
4718 | */ | ||
4719 | mb_mark_used(e4b, &ex); | ||
4720 | ext4_unlock_group(sb, group); | ||
4721 | |||
4722 | ret = ext4_issue_discard(sb, group, start, count); | ||
4723 | if (ret) | ||
4724 | ext4_std_error(sb, ret); | ||
4725 | |||
4726 | ext4_lock_group(sb, group); | ||
4727 | mb_free_blocks(NULL, e4b, start, ex.fe_len); | ||
4728 | return ret; | ||
4729 | } | ||
4730 | |||
4731 | /** | ||
4732 | * ext4_trim_all_free -- function to trim all free space in alloc. group | ||
4733 | * @sb: super block for file system | ||
4734 | * @e4b: ext4 buddy | ||
4735 | * @start: first group block to examine | ||
4736 | * @max: last group block to examine | ||
4737 | * @minblocks: minimum extent block count | ||
4738 | * | ||
4739 | * ext4_trim_all_free walks through group's buddy bitmap searching for free | ||
4740 | * extents. When the free block is found, ext4_trim_extent is called to TRIM | ||
4741 | * the extent. | ||
4742 | * | ||
4743 | * | ||
4744 | * ext4_trim_all_free walks through group's block bitmap searching for free | ||
4745 | * extents. When the free extent is found, mark it as used in group buddy | ||
4746 | * bitmap. Then issue a TRIM command on this extent and free the extent in | ||
4747 | * the group buddy bitmap. This is done until whole group is scanned. | ||
4748 | */ | ||
4749 | ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | ||
4750 | ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) | ||
4751 | { | ||
4752 | void *bitmap; | ||
4753 | ext4_grpblk_t next, count = 0; | ||
4754 | ext4_group_t group; | ||
4755 | int ret = 0; | ||
4756 | |||
4757 | BUG_ON(e4b == NULL); | ||
4758 | |||
4759 | bitmap = e4b->bd_bitmap; | ||
4760 | group = e4b->bd_group; | ||
4761 | start = (e4b->bd_info->bb_first_free > start) ? | ||
4762 | e4b->bd_info->bb_first_free : start; | ||
4763 | ext4_lock_group(sb, group); | ||
4764 | |||
4765 | while (start < max) { | ||
4766 | start = mb_find_next_zero_bit(bitmap, max, start); | ||
4767 | if (start >= max) | ||
4768 | break; | ||
4769 | next = mb_find_next_bit(bitmap, max, start); | ||
4770 | |||
4771 | if ((next - start) >= minblocks) { | ||
4772 | ret = ext4_trim_extent(sb, start, | ||
4773 | next - start, group, e4b); | ||
4774 | if (ret < 0) | ||
4775 | break; | ||
4776 | count += next - start; | ||
4777 | } | ||
4778 | start = next + 1; | ||
4779 | |||
4780 | if (fatal_signal_pending(current)) { | ||
4781 | count = -ERESTARTSYS; | ||
4782 | break; | ||
4783 | } | ||
4784 | |||
4785 | if (need_resched()) { | ||
4786 | ext4_unlock_group(sb, group); | ||
4787 | cond_resched(); | ||
4788 | ext4_lock_group(sb, group); | ||
4789 | } | ||
4790 | |||
4791 | if ((e4b->bd_info->bb_free - count) < minblocks) | ||
4792 | break; | ||
4793 | } | ||
4794 | ext4_unlock_group(sb, group); | ||
4795 | |||
4796 | ext4_debug("trimmed %d blocks in the group %d\n", | ||
4797 | count, group); | ||
4798 | |||
4799 | if (ret < 0) | ||
4800 | count = ret; | ||
4801 | |||
4802 | return count; | ||
4803 | } | ||
4804 | |||
4805 | /** | ||
4806 | * ext4_trim_fs() -- trim ioctl handle function | ||
4807 | * @sb: superblock for filesystem | ||
4808 | * @range: fstrim_range structure | ||
4809 | * | ||
4810 | * start: First Byte to trim | ||
4811 | * len: number of Bytes to trim from start | ||
4812 | * minlen: minimum extent length in Bytes | ||
4813 | * ext4_trim_fs goes through all allocation groups containing Bytes from | ||
4814 | * start to start+len. For each such a group ext4_trim_all_free function | ||
4815 | * is invoked to trim all free space. | ||
4816 | */ | ||
4817 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | ||
4818 | { | ||
4819 | struct ext4_buddy e4b; | ||
4820 | ext4_group_t first_group, last_group; | ||
4821 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); | ||
4822 | ext4_grpblk_t cnt = 0, first_block, last_block; | ||
4823 | uint64_t start, len, minlen, trimmed; | ||
4824 | int ret = 0; | ||
4825 | |||
4826 | start = range->start >> sb->s_blocksize_bits; | ||
4827 | len = range->len >> sb->s_blocksize_bits; | ||
4828 | minlen = range->minlen >> sb->s_blocksize_bits; | ||
4829 | trimmed = 0; | ||
4830 | |||
4831 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) | ||
4832 | return -EINVAL; | ||
4833 | |||
4834 | /* Determine first and last group to examine based on start and len */ | ||
4835 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, | ||
4836 | &first_group, &first_block); | ||
4837 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), | ||
4838 | &last_group, &last_block); | ||
4839 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; | ||
4840 | last_block = EXT4_BLOCKS_PER_GROUP(sb); | ||
4841 | |||
4842 | if (first_group > last_group) | ||
4843 | return -EINVAL; | ||
4844 | |||
4845 | for (group = first_group; group <= last_group; group++) { | ||
4846 | ret = ext4_mb_load_buddy(sb, group, &e4b); | ||
4847 | if (ret) { | ||
4848 | ext4_error(sb, "Error in loading buddy " | ||
4849 | "information for %u", group); | ||
4850 | break; | ||
4851 | } | ||
4852 | |||
4853 | if (len >= EXT4_BLOCKS_PER_GROUP(sb)) | ||
4854 | len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block); | ||
4855 | else | ||
4856 | last_block = len; | ||
4857 | |||
4858 | if (e4b.bd_info->bb_free >= minlen) { | ||
4859 | cnt = ext4_trim_all_free(sb, &e4b, first_block, | ||
4860 | last_block, minlen); | ||
4861 | if (cnt < 0) { | ||
4862 | ret = cnt; | ||
4863 | ext4_mb_unload_buddy(&e4b); | ||
4864 | break; | ||
4865 | } | ||
4866 | } | ||
4867 | ext4_mb_unload_buddy(&e4b); | ||
4868 | trimmed += cnt; | ||
4869 | first_block = 0; | ||
4870 | } | ||
4871 | range->len = trimmed * sb->s_blocksize; | ||
4872 | |||
4873 | return ret; | ||
4874 | } | ||