diff options
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 893 |
1 files changed, 579 insertions, 314 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4b4ad4b7ce57..6ed859d56850 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -92,7 +92,7 @@ | |||
92 | * between CPUs. It is possible to get scheduled at this point. | 92 | * between CPUs. It is possible to get scheduled at this point. |
93 | * | 93 | * |
94 | * The locality group prealloc space is used looking at whether we have | 94 | * The locality group prealloc space is used looking at whether we have |
95 | * enough free space (pa_free) withing the prealloc space. | 95 | * enough free space (pa_free) within the prealloc space. |
96 | * | 96 | * |
97 | * If we can't allocate blocks via inode prealloc or/and locality group | 97 | * If we can't allocate blocks via inode prealloc or/and locality group |
98 | * prealloc then we look at the buddy cache. The buddy cache is represented | 98 | * prealloc then we look at the buddy cache. The buddy cache is represented |
@@ -338,6 +338,19 @@ | |||
338 | static struct kmem_cache *ext4_pspace_cachep; | 338 | static struct kmem_cache *ext4_pspace_cachep; |
339 | static struct kmem_cache *ext4_ac_cachep; | 339 | static struct kmem_cache *ext4_ac_cachep; |
340 | static struct kmem_cache *ext4_free_ext_cachep; | 340 | static struct kmem_cache *ext4_free_ext_cachep; |
341 | |||
342 | /* We create slab caches for groupinfo data structures based on the | ||
343 | * superblock block size. There will be one per mounted filesystem for | ||
344 | * each unique s_blocksize_bits */ | ||
345 | #define NR_GRPINFO_CACHES 8 | ||
346 | static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; | ||
347 | |||
348 | static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = { | ||
349 | "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k", | ||
350 | "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k", | ||
351 | "ext4_groupinfo_64k", "ext4_groupinfo_128k" | ||
352 | }; | ||
353 | |||
341 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 354 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
342 | ext4_group_t group); | 355 | ext4_group_t group); |
343 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 356 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
@@ -419,9 +432,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) | |||
419 | } | 432 | } |
420 | 433 | ||
421 | /* at order 0 we see each particular block */ | 434 | /* at order 0 we see each particular block */ |
422 | *max = 1 << (e4b->bd_blkbits + 3); | 435 | if (order == 0) { |
423 | if (order == 0) | 436 | *max = 1 << (e4b->bd_blkbits + 3); |
424 | return EXT4_MB_BITMAP(e4b); | 437 | return EXT4_MB_BITMAP(e4b); |
438 | } | ||
425 | 439 | ||
426 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; | 440 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; |
427 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; | 441 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; |
@@ -603,7 +617,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
603 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); | 617 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); |
604 | 618 | ||
605 | grp = ext4_get_group_info(sb, e4b->bd_group); | 619 | grp = ext4_get_group_info(sb, e4b->bd_group); |
606 | buddy = mb_find_buddy(e4b, 0, &max); | ||
607 | list_for_each(cur, &grp->bb_prealloc_list) { | 620 | list_for_each(cur, &grp->bb_prealloc_list) { |
608 | ext4_group_t groupnr; | 621 | ext4_group_t groupnr; |
609 | struct ext4_prealloc_space *pa; | 622 | struct ext4_prealloc_space *pa; |
@@ -622,7 +635,12 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
622 | #define mb_check_buddy(e4b) | 635 | #define mb_check_buddy(e4b) |
623 | #endif | 636 | #endif |
624 | 637 | ||
625 | /* FIXME!! need more doc */ | 638 | /* |
639 | * Divide blocks started from @first with length @len into | ||
640 | * smaller chunks with power of 2 blocks. | ||
641 | * Clear the bits in bitmap which the blocks of the chunk(s) covered, | ||
642 | * then increase bb_counters[] for corresponded chunk size. | ||
643 | */ | ||
626 | static void ext4_mb_mark_free_simple(struct super_block *sb, | 644 | static void ext4_mb_mark_free_simple(struct super_block *sb, |
627 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, | 645 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, |
628 | struct ext4_group_info *grp) | 646 | struct ext4_group_info *grp) |
@@ -769,6 +787,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
769 | struct inode *inode; | 787 | struct inode *inode; |
770 | char *data; | 788 | char *data; |
771 | char *bitmap; | 789 | char *bitmap; |
790 | struct ext4_group_info *grinfo; | ||
772 | 791 | ||
773 | mb_debug(1, "init page %lu\n", page->index); | 792 | mb_debug(1, "init page %lu\n", page->index); |
774 | 793 | ||
@@ -801,6 +820,18 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
801 | if (first_group + i >= ngroups) | 820 | if (first_group + i >= ngroups) |
802 | break; | 821 | break; |
803 | 822 | ||
823 | grinfo = ext4_get_group_info(sb, first_group + i); | ||
824 | /* | ||
825 | * If page is uptodate then we came here after online resize | ||
826 | * which added some new uninitialized group info structs, so | ||
827 | * we must skip all initialized uptodate buddies on the page, | ||
828 | * which may be currently in use by an allocating task. | ||
829 | */ | ||
830 | if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) { | ||
831 | bh[i] = NULL; | ||
832 | continue; | ||
833 | } | ||
834 | |||
804 | err = -EIO; | 835 | err = -EIO; |
805 | desc = ext4_get_group_desc(sb, first_group + i, NULL); | 836 | desc = ext4_get_group_desc(sb, first_group + i, NULL); |
806 | if (desc == NULL) | 837 | if (desc == NULL) |
@@ -853,26 +884,28 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
853 | } | 884 | } |
854 | 885 | ||
855 | /* wait for I/O completion */ | 886 | /* wait for I/O completion */ |
856 | for (i = 0; i < groups_per_page && bh[i]; i++) | 887 | for (i = 0; i < groups_per_page; i++) |
857 | wait_on_buffer(bh[i]); | 888 | if (bh[i]) |
889 | wait_on_buffer(bh[i]); | ||
858 | 890 | ||
859 | err = -EIO; | 891 | err = -EIO; |
860 | for (i = 0; i < groups_per_page && bh[i]; i++) | 892 | for (i = 0; i < groups_per_page; i++) |
861 | if (!buffer_uptodate(bh[i])) | 893 | if (bh[i] && !buffer_uptodate(bh[i])) |
862 | goto out; | 894 | goto out; |
863 | 895 | ||
864 | err = 0; | 896 | err = 0; |
865 | first_block = page->index * blocks_per_page; | 897 | first_block = page->index * blocks_per_page; |
866 | /* init the page */ | ||
867 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); | ||
868 | for (i = 0; i < blocks_per_page; i++) { | 898 | for (i = 0; i < blocks_per_page; i++) { |
869 | int group; | 899 | int group; |
870 | struct ext4_group_info *grinfo; | ||
871 | 900 | ||
872 | group = (first_block + i) >> 1; | 901 | group = (first_block + i) >> 1; |
873 | if (group >= ngroups) | 902 | if (group >= ngroups) |
874 | break; | 903 | break; |
875 | 904 | ||
905 | if (!bh[group - first_group]) | ||
906 | /* skip initialized uptodate buddy */ | ||
907 | continue; | ||
908 | |||
876 | /* | 909 | /* |
877 | * data carry information regarding this | 910 | * data carry information regarding this |
878 | * particular group in the format specified | 911 | * particular group in the format specified |
@@ -901,6 +934,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
901 | * incore got set to the group block bitmap below | 934 | * incore got set to the group block bitmap below |
902 | */ | 935 | */ |
903 | ext4_lock_group(sb, group); | 936 | ext4_lock_group(sb, group); |
937 | /* init the buddy */ | ||
938 | memset(data, 0xff, blocksize); | ||
904 | ext4_mb_generate_buddy(sb, data, incore, group); | 939 | ext4_mb_generate_buddy(sb, data, incore, group); |
905 | ext4_unlock_group(sb, group); | 940 | ext4_unlock_group(sb, group); |
906 | incore = NULL; | 941 | incore = NULL; |
@@ -930,7 +965,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
930 | 965 | ||
931 | out: | 966 | out: |
932 | if (bh) { | 967 | if (bh) { |
933 | for (i = 0; i < groups_per_page && bh[i]; i++) | 968 | for (i = 0; i < groups_per_page; i++) |
934 | brelse(bh[i]); | 969 | brelse(bh[i]); |
935 | if (bh != &bhs) | 970 | if (bh != &bhs) |
936 | kfree(bh); | 971 | kfree(bh); |
@@ -939,6 +974,67 @@ out: | |||
939 | } | 974 | } |
940 | 975 | ||
941 | /* | 976 | /* |
977 | * Lock the buddy and bitmap pages. This make sure other parallel init_group | ||
978 | * on the same buddy page doesn't happen whild holding the buddy page lock. | ||
979 | * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap | ||
980 | * are on the same page e4b->bd_buddy_page is NULL and return value is 0. | ||
981 | */ | ||
982 | static int ext4_mb_get_buddy_page_lock(struct super_block *sb, | ||
983 | ext4_group_t group, struct ext4_buddy *e4b) | ||
984 | { | ||
985 | struct inode *inode = EXT4_SB(sb)->s_buddy_cache; | ||
986 | int block, pnum, poff; | ||
987 | int blocks_per_page; | ||
988 | struct page *page; | ||
989 | |||
990 | e4b->bd_buddy_page = NULL; | ||
991 | e4b->bd_bitmap_page = NULL; | ||
992 | |||
993 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
994 | /* | ||
995 | * the buddy cache inode stores the block bitmap | ||
996 | * and buddy information in consecutive blocks. | ||
997 | * So for each group we need two blocks. | ||
998 | */ | ||
999 | block = group * 2; | ||
1000 | pnum = block / blocks_per_page; | ||
1001 | poff = block % blocks_per_page; | ||
1002 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1003 | if (!page) | ||
1004 | return -EIO; | ||
1005 | BUG_ON(page->mapping != inode->i_mapping); | ||
1006 | e4b->bd_bitmap_page = page; | ||
1007 | e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1008 | |||
1009 | if (blocks_per_page >= 2) { | ||
1010 | /* buddy and bitmap are on the same page */ | ||
1011 | return 0; | ||
1012 | } | ||
1013 | |||
1014 | block++; | ||
1015 | pnum = block / blocks_per_page; | ||
1016 | poff = block % blocks_per_page; | ||
1017 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1018 | if (!page) | ||
1019 | return -EIO; | ||
1020 | BUG_ON(page->mapping != inode->i_mapping); | ||
1021 | e4b->bd_buddy_page = page; | ||
1022 | return 0; | ||
1023 | } | ||
1024 | |||
1025 | static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b) | ||
1026 | { | ||
1027 | if (e4b->bd_bitmap_page) { | ||
1028 | unlock_page(e4b->bd_bitmap_page); | ||
1029 | page_cache_release(e4b->bd_bitmap_page); | ||
1030 | } | ||
1031 | if (e4b->bd_buddy_page) { | ||
1032 | unlock_page(e4b->bd_buddy_page); | ||
1033 | page_cache_release(e4b->bd_buddy_page); | ||
1034 | } | ||
1035 | } | ||
1036 | |||
1037 | /* | ||
942 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the | 1038 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the |
943 | * block group lock of all groups for this page; do not hold the BG lock when | 1039 | * block group lock of all groups for this page; do not hold the BG lock when |
944 | * calling this routine! | 1040 | * calling this routine! |
@@ -947,93 +1043,60 @@ static noinline_for_stack | |||
947 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | 1043 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) |
948 | { | 1044 | { |
949 | 1045 | ||
950 | int ret = 0; | ||
951 | void *bitmap; | ||
952 | int blocks_per_page; | ||
953 | int block, pnum, poff; | ||
954 | int num_grp_locked = 0; | ||
955 | struct ext4_group_info *this_grp; | 1046 | struct ext4_group_info *this_grp; |
956 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1047 | struct ext4_buddy e4b; |
957 | struct inode *inode = sbi->s_buddy_cache; | 1048 | struct page *page; |
958 | struct page *page = NULL, *bitmap_page = NULL; | 1049 | int ret = 0; |
959 | 1050 | ||
960 | mb_debug(1, "init group %u\n", group); | 1051 | mb_debug(1, "init group %u\n", group); |
961 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
962 | this_grp = ext4_get_group_info(sb, group); | 1052 | this_grp = ext4_get_group_info(sb, group); |
963 | /* | 1053 | /* |
964 | * This ensures that we don't reinit the buddy cache | 1054 | * This ensures that we don't reinit the buddy cache |
965 | * page which map to the group from which we are already | 1055 | * page which map to the group from which we are already |
966 | * allocating. If we are looking at the buddy cache we would | 1056 | * allocating. If we are looking at the buddy cache we would |
967 | * have taken a reference using ext4_mb_load_buddy and that | 1057 | * have taken a reference using ext4_mb_load_buddy and that |
968 | * would have taken the alloc_sem lock. | 1058 | * would have pinned buddy page to page cache. |
969 | */ | 1059 | */ |
970 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | 1060 | ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b); |
971 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | 1061 | if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { |
972 | /* | 1062 | /* |
973 | * somebody initialized the group | 1063 | * somebody initialized the group |
974 | * return without doing anything | 1064 | * return without doing anything |
975 | */ | 1065 | */ |
976 | ret = 0; | ||
977 | goto err; | 1066 | goto err; |
978 | } | 1067 | } |
979 | /* | 1068 | |
980 | * the buddy cache inode stores the block bitmap | 1069 | page = e4b.bd_bitmap_page; |
981 | * and buddy information in consecutive blocks. | 1070 | ret = ext4_mb_init_cache(page, NULL); |
982 | * So for each group we need two blocks. | 1071 | if (ret) |
983 | */ | 1072 | goto err; |
984 | block = group * 2; | 1073 | if (!PageUptodate(page)) { |
985 | pnum = block / blocks_per_page; | ||
986 | poff = block % blocks_per_page; | ||
987 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
988 | if (page) { | ||
989 | BUG_ON(page->mapping != inode->i_mapping); | ||
990 | ret = ext4_mb_init_cache(page, NULL); | ||
991 | if (ret) { | ||
992 | unlock_page(page); | ||
993 | goto err; | ||
994 | } | ||
995 | unlock_page(page); | ||
996 | } | ||
997 | if (page == NULL || !PageUptodate(page)) { | ||
998 | ret = -EIO; | 1074 | ret = -EIO; |
999 | goto err; | 1075 | goto err; |
1000 | } | 1076 | } |
1001 | mark_page_accessed(page); | 1077 | mark_page_accessed(page); |
1002 | bitmap_page = page; | ||
1003 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1004 | 1078 | ||
1005 | /* init buddy cache */ | 1079 | if (e4b.bd_buddy_page == NULL) { |
1006 | block++; | ||
1007 | pnum = block / blocks_per_page; | ||
1008 | poff = block % blocks_per_page; | ||
1009 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1010 | if (page == bitmap_page) { | ||
1011 | /* | 1080 | /* |
1012 | * If both the bitmap and buddy are in | 1081 | * If both the bitmap and buddy are in |
1013 | * the same page we don't need to force | 1082 | * the same page we don't need to force |
1014 | * init the buddy | 1083 | * init the buddy |
1015 | */ | 1084 | */ |
1016 | unlock_page(page); | 1085 | ret = 0; |
1017 | } else if (page) { | 1086 | goto err; |
1018 | BUG_ON(page->mapping != inode->i_mapping); | ||
1019 | ret = ext4_mb_init_cache(page, bitmap); | ||
1020 | if (ret) { | ||
1021 | unlock_page(page); | ||
1022 | goto err; | ||
1023 | } | ||
1024 | unlock_page(page); | ||
1025 | } | 1087 | } |
1026 | if (page == NULL || !PageUptodate(page)) { | 1088 | /* init buddy cache */ |
1089 | page = e4b.bd_buddy_page; | ||
1090 | ret = ext4_mb_init_cache(page, e4b.bd_bitmap); | ||
1091 | if (ret) | ||
1092 | goto err; | ||
1093 | if (!PageUptodate(page)) { | ||
1027 | ret = -EIO; | 1094 | ret = -EIO; |
1028 | goto err; | 1095 | goto err; |
1029 | } | 1096 | } |
1030 | mark_page_accessed(page); | 1097 | mark_page_accessed(page); |
1031 | err: | 1098 | err: |
1032 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | 1099 | ext4_mb_put_buddy_page_lock(&e4b); |
1033 | if (bitmap_page) | ||
1034 | page_cache_release(bitmap_page); | ||
1035 | if (page) | ||
1036 | page_cache_release(page); | ||
1037 | return ret; | 1100 | return ret; |
1038 | } | 1101 | } |
1039 | 1102 | ||
@@ -1067,24 +1130,8 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
1067 | e4b->bd_group = group; | 1130 | e4b->bd_group = group; |
1068 | e4b->bd_buddy_page = NULL; | 1131 | e4b->bd_buddy_page = NULL; |
1069 | e4b->bd_bitmap_page = NULL; | 1132 | e4b->bd_bitmap_page = NULL; |
1070 | e4b->alloc_semp = &grp->alloc_sem; | ||
1071 | |||
1072 | /* Take the read lock on the group alloc | ||
1073 | * sem. This would make sure a parallel | ||
1074 | * ext4_mb_init_group happening on other | ||
1075 | * groups mapped by the page is blocked | ||
1076 | * till we are done with allocation | ||
1077 | */ | ||
1078 | repeat_load_buddy: | ||
1079 | down_read(e4b->alloc_semp); | ||
1080 | 1133 | ||
1081 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | 1134 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
1082 | /* we need to check for group need init flag | ||
1083 | * with alloc_semp held so that we can be sure | ||
1084 | * that new blocks didn't get added to the group | ||
1085 | * when we are loading the buddy cache | ||
1086 | */ | ||
1087 | up_read(e4b->alloc_semp); | ||
1088 | /* | 1135 | /* |
1089 | * we need full data about the group | 1136 | * we need full data about the group |
1090 | * to make a good selection | 1137 | * to make a good selection |
@@ -1092,7 +1139,6 @@ repeat_load_buddy: | |||
1092 | ret = ext4_mb_init_group(sb, group); | 1139 | ret = ext4_mb_init_group(sb, group); |
1093 | if (ret) | 1140 | if (ret) |
1094 | return ret; | 1141 | return ret; |
1095 | goto repeat_load_buddy; | ||
1096 | } | 1142 | } |
1097 | 1143 | ||
1098 | /* | 1144 | /* |
@@ -1176,15 +1222,14 @@ repeat_load_buddy: | |||
1176 | return 0; | 1222 | return 0; |
1177 | 1223 | ||
1178 | err: | 1224 | err: |
1225 | if (page) | ||
1226 | page_cache_release(page); | ||
1179 | if (e4b->bd_bitmap_page) | 1227 | if (e4b->bd_bitmap_page) |
1180 | page_cache_release(e4b->bd_bitmap_page); | 1228 | page_cache_release(e4b->bd_bitmap_page); |
1181 | if (e4b->bd_buddy_page) | 1229 | if (e4b->bd_buddy_page) |
1182 | page_cache_release(e4b->bd_buddy_page); | 1230 | page_cache_release(e4b->bd_buddy_page); |
1183 | e4b->bd_buddy = NULL; | 1231 | e4b->bd_buddy = NULL; |
1184 | e4b->bd_bitmap = NULL; | 1232 | e4b->bd_bitmap = NULL; |
1185 | |||
1186 | /* Done with the buddy cache */ | ||
1187 | up_read(e4b->alloc_semp); | ||
1188 | return ret; | 1233 | return ret; |
1189 | } | 1234 | } |
1190 | 1235 | ||
@@ -1194,9 +1239,6 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) | |||
1194 | page_cache_release(e4b->bd_bitmap_page); | 1239 | page_cache_release(e4b->bd_bitmap_page); |
1195 | if (e4b->bd_buddy_page) | 1240 | if (e4b->bd_buddy_page) |
1196 | page_cache_release(e4b->bd_buddy_page); | 1241 | page_cache_release(e4b->bd_buddy_page); |
1197 | /* Done with the buddy cache */ | ||
1198 | if (e4b->alloc_semp) | ||
1199 | up_read(e4b->alloc_semp); | ||
1200 | } | 1242 | } |
1201 | 1243 | ||
1202 | 1244 | ||
@@ -1509,9 +1551,6 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, | |||
1509 | get_page(ac->ac_bitmap_page); | 1551 | get_page(ac->ac_bitmap_page); |
1510 | ac->ac_buddy_page = e4b->bd_buddy_page; | 1552 | ac->ac_buddy_page = e4b->bd_buddy_page; |
1511 | get_page(ac->ac_buddy_page); | 1553 | get_page(ac->ac_buddy_page); |
1512 | /* on allocation we use ac to track the held semaphore */ | ||
1513 | ac->alloc_semp = e4b->alloc_semp; | ||
1514 | e4b->alloc_semp = NULL; | ||
1515 | /* store last allocated for subsequent stream allocation */ | 1554 | /* store last allocated for subsequent stream allocation */ |
1516 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { | 1555 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
1517 | spin_lock(&sbi->s_md_lock); | 1556 | spin_lock(&sbi->s_md_lock); |
@@ -1915,84 +1954,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1915 | return 0; | 1954 | return 0; |
1916 | } | 1955 | } |
1917 | 1956 | ||
1918 | /* | ||
1919 | * lock the group_info alloc_sem of all the groups | ||
1920 | * belonging to the same buddy cache page. This | ||
1921 | * make sure other parallel operation on the buddy | ||
1922 | * cache doesn't happen whild holding the buddy cache | ||
1923 | * lock | ||
1924 | */ | ||
1925 | int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | ||
1926 | { | ||
1927 | int i; | ||
1928 | int block, pnum; | ||
1929 | int blocks_per_page; | ||
1930 | int groups_per_page; | ||
1931 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
1932 | ext4_group_t first_group; | ||
1933 | struct ext4_group_info *grp; | ||
1934 | |||
1935 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1936 | /* | ||
1937 | * the buddy cache inode stores the block bitmap | ||
1938 | * and buddy information in consecutive blocks. | ||
1939 | * So for each group we need two blocks. | ||
1940 | */ | ||
1941 | block = group * 2; | ||
1942 | pnum = block / blocks_per_page; | ||
1943 | first_group = pnum * blocks_per_page / 2; | ||
1944 | |||
1945 | groups_per_page = blocks_per_page >> 1; | ||
1946 | if (groups_per_page == 0) | ||
1947 | groups_per_page = 1; | ||
1948 | /* read all groups the page covers into the cache */ | ||
1949 | for (i = 0; i < groups_per_page; i++) { | ||
1950 | |||
1951 | if ((first_group + i) >= ngroups) | ||
1952 | break; | ||
1953 | grp = ext4_get_group_info(sb, first_group + i); | ||
1954 | /* take all groups write allocation | ||
1955 | * semaphore. This make sure there is | ||
1956 | * no block allocation going on in any | ||
1957 | * of that groups | ||
1958 | */ | ||
1959 | down_write_nested(&grp->alloc_sem, i); | ||
1960 | } | ||
1961 | return i; | ||
1962 | } | ||
1963 | |||
1964 | void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | ||
1965 | ext4_group_t group, int locked_group) | ||
1966 | { | ||
1967 | int i; | ||
1968 | int block, pnum; | ||
1969 | int blocks_per_page; | ||
1970 | ext4_group_t first_group; | ||
1971 | struct ext4_group_info *grp; | ||
1972 | |||
1973 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1974 | /* | ||
1975 | * the buddy cache inode stores the block bitmap | ||
1976 | * and buddy information in consecutive blocks. | ||
1977 | * So for each group we need two blocks. | ||
1978 | */ | ||
1979 | block = group * 2; | ||
1980 | pnum = block / blocks_per_page; | ||
1981 | first_group = pnum * blocks_per_page / 2; | ||
1982 | /* release locks on all the groups */ | ||
1983 | for (i = 0; i < locked_group; i++) { | ||
1984 | |||
1985 | grp = ext4_get_group_info(sb, first_group + i); | ||
1986 | /* take all groups write allocation | ||
1987 | * semaphore. This make sure there is | ||
1988 | * no block allocation going on in any | ||
1989 | * of that groups | ||
1990 | */ | ||
1991 | up_write(&grp->alloc_sem); | ||
1992 | } | ||
1993 | |||
1994 | } | ||
1995 | |||
1996 | static noinline_for_stack int | 1957 | static noinline_for_stack int |
1997 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 1958 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1998 | { | 1959 | { |
@@ -2233,15 +2194,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = { | |||
2233 | .release = seq_release, | 2194 | .release = seq_release, |
2234 | }; | 2195 | }; |
2235 | 2196 | ||
2197 | static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) | ||
2198 | { | ||
2199 | int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2200 | struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index]; | ||
2201 | |||
2202 | BUG_ON(!cachep); | ||
2203 | return cachep; | ||
2204 | } | ||
2236 | 2205 | ||
2237 | /* Create and initialize ext4_group_info data for the given group. */ | 2206 | /* Create and initialize ext4_group_info data for the given group. */ |
2238 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | 2207 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, |
2239 | struct ext4_group_desc *desc) | 2208 | struct ext4_group_desc *desc) |
2240 | { | 2209 | { |
2241 | int i, len; | 2210 | int i; |
2242 | int metalen = 0; | 2211 | int metalen = 0; |
2243 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2212 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2244 | struct ext4_group_info **meta_group_info; | 2213 | struct ext4_group_info **meta_group_info; |
2214 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2245 | 2215 | ||
2246 | /* | 2216 | /* |
2247 | * First check if this group is the first of a reserved block. | 2217 | * First check if this group is the first of a reserved block. |
@@ -2261,22 +2231,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2261 | meta_group_info; | 2231 | meta_group_info; |
2262 | } | 2232 | } |
2263 | 2233 | ||
2264 | /* | ||
2265 | * calculate needed size. if change bb_counters size, | ||
2266 | * don't forget about ext4_mb_generate_buddy() | ||
2267 | */ | ||
2268 | len = offsetof(typeof(**meta_group_info), | ||
2269 | bb_counters[sb->s_blocksize_bits + 2]); | ||
2270 | |||
2271 | meta_group_info = | 2234 | meta_group_info = |
2272 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; | 2235 | sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; |
2273 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 2236 | i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
2274 | 2237 | ||
2275 | meta_group_info[i] = kzalloc(len, GFP_KERNEL); | 2238 | meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); |
2276 | if (meta_group_info[i] == NULL) { | 2239 | if (meta_group_info[i] == NULL) { |
2277 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); | 2240 | printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); |
2278 | goto exit_group_info; | 2241 | goto exit_group_info; |
2279 | } | 2242 | } |
2243 | memset(meta_group_info[i], 0, kmem_cache_size(cachep)); | ||
2280 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, | 2244 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, |
2281 | &(meta_group_info[i]->bb_state)); | 2245 | &(meta_group_info[i]->bb_state)); |
2282 | 2246 | ||
@@ -2331,6 +2295,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2331 | int num_meta_group_infos_max; | 2295 | int num_meta_group_infos_max; |
2332 | int array_size; | 2296 | int array_size; |
2333 | struct ext4_group_desc *desc; | 2297 | struct ext4_group_desc *desc; |
2298 | struct kmem_cache *cachep; | ||
2334 | 2299 | ||
2335 | /* This is the number of blocks used by GDT */ | 2300 | /* This is the number of blocks used by GDT */ |
2336 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - | 2301 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - |
@@ -2363,7 +2328,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2363 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte | 2328 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte |
2364 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. | 2329 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. |
2365 | * So a two level scheme suffices for now. */ | 2330 | * So a two level scheme suffices for now. */ |
2366 | sbi->s_group_info = kmalloc(array_size, GFP_KERNEL); | 2331 | sbi->s_group_info = kzalloc(array_size, GFP_KERNEL); |
2367 | if (sbi->s_group_info == NULL) { | 2332 | if (sbi->s_group_info == NULL) { |
2368 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); | 2333 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); |
2369 | return -ENOMEM; | 2334 | return -ENOMEM; |
@@ -2373,6 +2338,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2373 | printk(KERN_ERR "EXT4-fs: can't get new inode\n"); | 2338 | printk(KERN_ERR "EXT4-fs: can't get new inode\n"); |
2374 | goto err_freesgi; | 2339 | goto err_freesgi; |
2375 | } | 2340 | } |
2341 | sbi->s_buddy_cache->i_ino = get_next_ino(); | ||
2376 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; | 2342 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; |
2377 | for (i = 0; i < ngroups; i++) { | 2343 | for (i = 0; i < ngroups; i++) { |
2378 | desc = ext4_get_group_desc(sb, i, NULL); | 2344 | desc = ext4_get_group_desc(sb, i, NULL); |
@@ -2388,8 +2354,9 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2388 | return 0; | 2354 | return 0; |
2389 | 2355 | ||
2390 | err_freebuddy: | 2356 | err_freebuddy: |
2357 | cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2391 | while (i-- > 0) | 2358 | while (i-- > 0) |
2392 | kfree(ext4_get_group_info(sb, i)); | 2359 | kmem_cache_free(cachep, ext4_get_group_info(sb, i)); |
2393 | i = num_meta_group_infos; | 2360 | i = num_meta_group_infos; |
2394 | while (i-- > 0) | 2361 | while (i-- > 0) |
2395 | kfree(sbi->s_group_info[i]); | 2362 | kfree(sbi->s_group_info[i]); |
@@ -2399,6 +2366,55 @@ err_freesgi: | |||
2399 | return -ENOMEM; | 2366 | return -ENOMEM; |
2400 | } | 2367 | } |
2401 | 2368 | ||
2369 | static void ext4_groupinfo_destroy_slabs(void) | ||
2370 | { | ||
2371 | int i; | ||
2372 | |||
2373 | for (i = 0; i < NR_GRPINFO_CACHES; i++) { | ||
2374 | if (ext4_groupinfo_caches[i]) | ||
2375 | kmem_cache_destroy(ext4_groupinfo_caches[i]); | ||
2376 | ext4_groupinfo_caches[i] = NULL; | ||
2377 | } | ||
2378 | } | ||
2379 | |||
2380 | static int ext4_groupinfo_create_slab(size_t size) | ||
2381 | { | ||
2382 | static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex); | ||
2383 | int slab_size; | ||
2384 | int blocksize_bits = order_base_2(size); | ||
2385 | int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2386 | struct kmem_cache *cachep; | ||
2387 | |||
2388 | if (cache_index >= NR_GRPINFO_CACHES) | ||
2389 | return -EINVAL; | ||
2390 | |||
2391 | if (unlikely(cache_index < 0)) | ||
2392 | cache_index = 0; | ||
2393 | |||
2394 | mutex_lock(&ext4_grpinfo_slab_create_mutex); | ||
2395 | if (ext4_groupinfo_caches[cache_index]) { | ||
2396 | mutex_unlock(&ext4_grpinfo_slab_create_mutex); | ||
2397 | return 0; /* Already created */ | ||
2398 | } | ||
2399 | |||
2400 | slab_size = offsetof(struct ext4_group_info, | ||
2401 | bb_counters[blocksize_bits + 2]); | ||
2402 | |||
2403 | cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index], | ||
2404 | slab_size, 0, SLAB_RECLAIM_ACCOUNT, | ||
2405 | NULL); | ||
2406 | |||
2407 | mutex_unlock(&ext4_grpinfo_slab_create_mutex); | ||
2408 | if (!cachep) { | ||
2409 | printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n"); | ||
2410 | return -ENOMEM; | ||
2411 | } | ||
2412 | |||
2413 | ext4_groupinfo_caches[cache_index] = cachep; | ||
2414 | |||
2415 | return 0; | ||
2416 | } | ||
2417 | |||
2402 | int ext4_mb_init(struct super_block *sb, int needs_recovery) | 2418 | int ext4_mb_init(struct super_block *sb, int needs_recovery) |
2403 | { | 2419 | { |
2404 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2420 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -2411,16 +2427,21 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2411 | 2427 | ||
2412 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2428 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2413 | if (sbi->s_mb_offsets == NULL) { | 2429 | if (sbi->s_mb_offsets == NULL) { |
2414 | return -ENOMEM; | 2430 | ret = -ENOMEM; |
2431 | goto out; | ||
2415 | } | 2432 | } |
2416 | 2433 | ||
2417 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); | 2434 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); |
2418 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2435 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2419 | if (sbi->s_mb_maxs == NULL) { | 2436 | if (sbi->s_mb_maxs == NULL) { |
2420 | kfree(sbi->s_mb_offsets); | 2437 | ret = -ENOMEM; |
2421 | return -ENOMEM; | 2438 | goto out; |
2422 | } | 2439 | } |
2423 | 2440 | ||
2441 | ret = ext4_groupinfo_create_slab(sb->s_blocksize); | ||
2442 | if (ret < 0) | ||
2443 | goto out; | ||
2444 | |||
2424 | /* order 0 is regular bitmap */ | 2445 | /* order 0 is regular bitmap */ |
2425 | sbi->s_mb_maxs[0] = sb->s_blocksize << 3; | 2446 | sbi->s_mb_maxs[0] = sb->s_blocksize << 3; |
2426 | sbi->s_mb_offsets[0] = 0; | 2447 | sbi->s_mb_offsets[0] = 0; |
@@ -2439,9 +2460,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2439 | /* init file for buddy data */ | 2460 | /* init file for buddy data */ |
2440 | ret = ext4_mb_init_backend(sb); | 2461 | ret = ext4_mb_init_backend(sb); |
2441 | if (ret != 0) { | 2462 | if (ret != 0) { |
2442 | kfree(sbi->s_mb_offsets); | 2463 | goto out; |
2443 | kfree(sbi->s_mb_maxs); | ||
2444 | return ret; | ||
2445 | } | 2464 | } |
2446 | 2465 | ||
2447 | spin_lock_init(&sbi->s_md_lock); | 2466 | spin_lock_init(&sbi->s_md_lock); |
@@ -2456,9 +2475,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2456 | 2475 | ||
2457 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); | 2476 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2458 | if (sbi->s_locality_groups == NULL) { | 2477 | if (sbi->s_locality_groups == NULL) { |
2459 | kfree(sbi->s_mb_offsets); | 2478 | ret = -ENOMEM; |
2460 | kfree(sbi->s_mb_maxs); | 2479 | goto out; |
2461 | return -ENOMEM; | ||
2462 | } | 2480 | } |
2463 | for_each_possible_cpu(i) { | 2481 | for_each_possible_cpu(i) { |
2464 | struct ext4_locality_group *lg; | 2482 | struct ext4_locality_group *lg; |
@@ -2475,7 +2493,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2475 | 2493 | ||
2476 | if (sbi->s_journal) | 2494 | if (sbi->s_journal) |
2477 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | 2495 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; |
2478 | return 0; | 2496 | out: |
2497 | if (ret) { | ||
2498 | kfree(sbi->s_mb_offsets); | ||
2499 | kfree(sbi->s_mb_maxs); | ||
2500 | } | ||
2501 | return ret; | ||
2479 | } | 2502 | } |
2480 | 2503 | ||
2481 | /* need to called with the ext4 group lock held */ | 2504 | /* need to called with the ext4 group lock held */ |
@@ -2503,6 +2526,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2503 | int num_meta_group_infos; | 2526 | int num_meta_group_infos; |
2504 | struct ext4_group_info *grinfo; | 2527 | struct ext4_group_info *grinfo; |
2505 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2528 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2529 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | ||
2506 | 2530 | ||
2507 | if (sbi->s_group_info) { | 2531 | if (sbi->s_group_info) { |
2508 | for (i = 0; i < ngroups; i++) { | 2532 | for (i = 0; i < ngroups; i++) { |
@@ -2513,7 +2537,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2513 | ext4_lock_group(sb, i); | 2537 | ext4_lock_group(sb, i); |
2514 | ext4_mb_cleanup_pa(grinfo); | 2538 | ext4_mb_cleanup_pa(grinfo); |
2515 | ext4_unlock_group(sb, i); | 2539 | ext4_unlock_group(sb, i); |
2516 | kfree(grinfo); | 2540 | kmem_cache_free(cachep, grinfo); |
2517 | } | 2541 | } |
2518 | num_meta_group_infos = (ngroups + | 2542 | num_meta_group_infos = (ngroups + |
2519 | EXT4_DESC_PER_BLOCK(sb) - 1) >> | 2543 | EXT4_DESC_PER_BLOCK(sb) - 1) >> |
@@ -2557,20 +2581,15 @@ int ext4_mb_release(struct super_block *sb) | |||
2557 | return 0; | 2581 | return 0; |
2558 | } | 2582 | } |
2559 | 2583 | ||
2560 | static inline void ext4_issue_discard(struct super_block *sb, | 2584 | static inline int ext4_issue_discard(struct super_block *sb, |
2561 | ext4_group_t block_group, ext4_grpblk_t block, int count) | 2585 | ext4_group_t block_group, ext4_grpblk_t block, int count) |
2562 | { | 2586 | { |
2563 | int ret; | ||
2564 | ext4_fsblk_t discard_block; | 2587 | ext4_fsblk_t discard_block; |
2565 | 2588 | ||
2566 | discard_block = block + ext4_group_first_block_no(sb, block_group); | 2589 | discard_block = block + ext4_group_first_block_no(sb, block_group); |
2567 | trace_ext4_discard_blocks(sb, | 2590 | trace_ext4_discard_blocks(sb, |
2568 | (unsigned long long) discard_block, count); | 2591 | (unsigned long long) discard_block, count); |
2569 | ret = sb_issue_discard(sb, discard_block, count); | 2592 | return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); |
2570 | if (ret == EOPNOTSUPP) { | ||
2571 | ext4_warning(sb, "discard not supported, disabling"); | ||
2572 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | ||
2573 | } | ||
2574 | } | 2593 | } |
2575 | 2594 | ||
2576 | /* | 2595 | /* |
@@ -2594,7 +2613,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2594 | 2613 | ||
2595 | if (test_opt(sb, DISCARD)) | 2614 | if (test_opt(sb, DISCARD)) |
2596 | ext4_issue_discard(sb, entry->group, | 2615 | ext4_issue_discard(sb, entry->group, |
2597 | entry->start_blk, entry->count); | 2616 | entry->start_blk, entry->count); |
2598 | 2617 | ||
2599 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2618 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2600 | /* we expect to find existing buddy because it's pinned */ | 2619 | /* we expect to find existing buddy because it's pinned */ |
@@ -2658,28 +2677,22 @@ static void ext4_remove_debugfs_entry(void) | |||
2658 | 2677 | ||
2659 | #endif | 2678 | #endif |
2660 | 2679 | ||
2661 | int __init init_ext4_mballoc(void) | 2680 | int __init ext4_init_mballoc(void) |
2662 | { | 2681 | { |
2663 | ext4_pspace_cachep = | 2682 | ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space, |
2664 | kmem_cache_create("ext4_prealloc_space", | 2683 | SLAB_RECLAIM_ACCOUNT); |
2665 | sizeof(struct ext4_prealloc_space), | ||
2666 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2667 | if (ext4_pspace_cachep == NULL) | 2684 | if (ext4_pspace_cachep == NULL) |
2668 | return -ENOMEM; | 2685 | return -ENOMEM; |
2669 | 2686 | ||
2670 | ext4_ac_cachep = | 2687 | ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context, |
2671 | kmem_cache_create("ext4_alloc_context", | 2688 | SLAB_RECLAIM_ACCOUNT); |
2672 | sizeof(struct ext4_allocation_context), | ||
2673 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2674 | if (ext4_ac_cachep == NULL) { | 2689 | if (ext4_ac_cachep == NULL) { |
2675 | kmem_cache_destroy(ext4_pspace_cachep); | 2690 | kmem_cache_destroy(ext4_pspace_cachep); |
2676 | return -ENOMEM; | 2691 | return -ENOMEM; |
2677 | } | 2692 | } |
2678 | 2693 | ||
2679 | ext4_free_ext_cachep = | 2694 | ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, |
2680 | kmem_cache_create("ext4_free_block_extents", | 2695 | SLAB_RECLAIM_ACCOUNT); |
2681 | sizeof(struct ext4_free_data), | ||
2682 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2683 | if (ext4_free_ext_cachep == NULL) { | 2696 | if (ext4_free_ext_cachep == NULL) { |
2684 | kmem_cache_destroy(ext4_pspace_cachep); | 2697 | kmem_cache_destroy(ext4_pspace_cachep); |
2685 | kmem_cache_destroy(ext4_ac_cachep); | 2698 | kmem_cache_destroy(ext4_ac_cachep); |
@@ -2689,7 +2702,7 @@ int __init init_ext4_mballoc(void) | |||
2689 | return 0; | 2702 | return 0; |
2690 | } | 2703 | } |
2691 | 2704 | ||
2692 | void exit_ext4_mballoc(void) | 2705 | void ext4_exit_mballoc(void) |
2693 | { | 2706 | { |
2694 | /* | 2707 | /* |
2695 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep | 2708 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep |
@@ -2699,6 +2712,7 @@ void exit_ext4_mballoc(void) | |||
2699 | kmem_cache_destroy(ext4_pspace_cachep); | 2712 | kmem_cache_destroy(ext4_pspace_cachep); |
2700 | kmem_cache_destroy(ext4_ac_cachep); | 2713 | kmem_cache_destroy(ext4_ac_cachep); |
2701 | kmem_cache_destroy(ext4_free_ext_cachep); | 2714 | kmem_cache_destroy(ext4_free_ext_cachep); |
2715 | ext4_groupinfo_destroy_slabs(); | ||
2702 | ext4_remove_debugfs_entry(); | 2716 | ext4_remove_debugfs_entry(); |
2703 | } | 2717 | } |
2704 | 2718 | ||
@@ -3135,7 +3149,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | |||
3135 | cur_distance = abs(goal_block - cpa->pa_pstart); | 3149 | cur_distance = abs(goal_block - cpa->pa_pstart); |
3136 | new_distance = abs(goal_block - pa->pa_pstart); | 3150 | new_distance = abs(goal_block - pa->pa_pstart); |
3137 | 3151 | ||
3138 | if (cur_distance < new_distance) | 3152 | if (cur_distance <= new_distance) |
3139 | return cpa; | 3153 | return cpa; |
3140 | 3154 | ||
3141 | /* drop the previous reference */ | 3155 | /* drop the previous reference */ |
@@ -3535,8 +3549,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) | |||
3535 | */ | 3549 | */ |
3536 | static noinline_for_stack int | 3550 | static noinline_for_stack int |
3537 | ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | 3551 | ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, |
3538 | struct ext4_prealloc_space *pa, | 3552 | struct ext4_prealloc_space *pa) |
3539 | struct ext4_allocation_context *ac) | ||
3540 | { | 3553 | { |
3541 | struct super_block *sb = e4b->bd_sb; | 3554 | struct super_block *sb = e4b->bd_sb; |
3542 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3555 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -3554,11 +3567,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3554 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3567 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3555 | end = bit + pa->pa_len; | 3568 | end = bit + pa->pa_len; |
3556 | 3569 | ||
3557 | if (ac) { | ||
3558 | ac->ac_sb = sb; | ||
3559 | ac->ac_inode = pa->pa_inode; | ||
3560 | } | ||
3561 | |||
3562 | while (bit < end) { | 3570 | while (bit < end) { |
3563 | bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); | 3571 | bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); |
3564 | if (bit >= end) | 3572 | if (bit >= end) |
@@ -3569,15 +3577,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3569 | (unsigned) next - bit, (unsigned) group); | 3577 | (unsigned) next - bit, (unsigned) group); |
3570 | free += next - bit; | 3578 | free += next - bit; |
3571 | 3579 | ||
3572 | if (ac) { | 3580 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); |
3573 | ac->ac_b_ex.fe_group = group; | 3581 | trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit, |
3574 | ac->ac_b_ex.fe_start = bit; | ||
3575 | ac->ac_b_ex.fe_len = next - bit; | ||
3576 | ac->ac_b_ex.fe_logical = 0; | ||
3577 | trace_ext4_mballoc_discard(ac); | ||
3578 | } | ||
3579 | |||
3580 | trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit, | ||
3581 | next - bit); | 3582 | next - bit); |
3582 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3583 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
3583 | bit = next + 1; | 3584 | bit = next + 1; |
@@ -3601,29 +3602,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3601 | 3602 | ||
3602 | static noinline_for_stack int | 3603 | static noinline_for_stack int |
3603 | ext4_mb_release_group_pa(struct ext4_buddy *e4b, | 3604 | ext4_mb_release_group_pa(struct ext4_buddy *e4b, |
3604 | struct ext4_prealloc_space *pa, | 3605 | struct ext4_prealloc_space *pa) |
3605 | struct ext4_allocation_context *ac) | ||
3606 | { | 3606 | { |
3607 | struct super_block *sb = e4b->bd_sb; | 3607 | struct super_block *sb = e4b->bd_sb; |
3608 | ext4_group_t group; | 3608 | ext4_group_t group; |
3609 | ext4_grpblk_t bit; | 3609 | ext4_grpblk_t bit; |
3610 | 3610 | ||
3611 | trace_ext4_mb_release_group_pa(sb, ac, pa); | 3611 | trace_ext4_mb_release_group_pa(pa); |
3612 | BUG_ON(pa->pa_deleted == 0); | 3612 | BUG_ON(pa->pa_deleted == 0); |
3613 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3613 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3614 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3614 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3615 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); | 3615 | mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); |
3616 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); | 3616 | atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); |
3617 | 3617 | trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); | |
3618 | if (ac) { | ||
3619 | ac->ac_sb = sb; | ||
3620 | ac->ac_inode = NULL; | ||
3621 | ac->ac_b_ex.fe_group = group; | ||
3622 | ac->ac_b_ex.fe_start = bit; | ||
3623 | ac->ac_b_ex.fe_len = pa->pa_len; | ||
3624 | ac->ac_b_ex.fe_logical = 0; | ||
3625 | trace_ext4_mballoc_discard(ac); | ||
3626 | } | ||
3627 | 3618 | ||
3628 | return 0; | 3619 | return 0; |
3629 | } | 3620 | } |
@@ -3644,7 +3635,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3644 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 3635 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
3645 | struct buffer_head *bitmap_bh = NULL; | 3636 | struct buffer_head *bitmap_bh = NULL; |
3646 | struct ext4_prealloc_space *pa, *tmp; | 3637 | struct ext4_prealloc_space *pa, *tmp; |
3647 | struct ext4_allocation_context *ac; | ||
3648 | struct list_head list; | 3638 | struct list_head list; |
3649 | struct ext4_buddy e4b; | 3639 | struct ext4_buddy e4b; |
3650 | int err; | 3640 | int err; |
@@ -3673,9 +3663,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3673 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; | 3663 | needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; |
3674 | 3664 | ||
3675 | INIT_LIST_HEAD(&list); | 3665 | INIT_LIST_HEAD(&list); |
3676 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
3677 | if (ac) | ||
3678 | ac->ac_sb = sb; | ||
3679 | repeat: | 3666 | repeat: |
3680 | ext4_lock_group(sb, group); | 3667 | ext4_lock_group(sb, group); |
3681 | list_for_each_entry_safe(pa, tmp, | 3668 | list_for_each_entry_safe(pa, tmp, |
@@ -3730,9 +3717,9 @@ repeat: | |||
3730 | spin_unlock(pa->pa_obj_lock); | 3717 | spin_unlock(pa->pa_obj_lock); |
3731 | 3718 | ||
3732 | if (pa->pa_type == MB_GROUP_PA) | 3719 | if (pa->pa_type == MB_GROUP_PA) |
3733 | ext4_mb_release_group_pa(&e4b, pa, ac); | 3720 | ext4_mb_release_group_pa(&e4b, pa); |
3734 | else | 3721 | else |
3735 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3722 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); |
3736 | 3723 | ||
3737 | list_del(&pa->u.pa_tmp_list); | 3724 | list_del(&pa->u.pa_tmp_list); |
3738 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 3725 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
@@ -3740,8 +3727,6 @@ repeat: | |||
3740 | 3727 | ||
3741 | out: | 3728 | out: |
3742 | ext4_unlock_group(sb, group); | 3729 | ext4_unlock_group(sb, group); |
3743 | if (ac) | ||
3744 | kmem_cache_free(ext4_ac_cachep, ac); | ||
3745 | ext4_mb_unload_buddy(&e4b); | 3730 | ext4_mb_unload_buddy(&e4b); |
3746 | put_bh(bitmap_bh); | 3731 | put_bh(bitmap_bh); |
3747 | return free; | 3732 | return free; |
@@ -3762,7 +3747,6 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3762 | struct super_block *sb = inode->i_sb; | 3747 | struct super_block *sb = inode->i_sb; |
3763 | struct buffer_head *bitmap_bh = NULL; | 3748 | struct buffer_head *bitmap_bh = NULL; |
3764 | struct ext4_prealloc_space *pa, *tmp; | 3749 | struct ext4_prealloc_space *pa, *tmp; |
3765 | struct ext4_allocation_context *ac; | ||
3766 | ext4_group_t group = 0; | 3750 | ext4_group_t group = 0; |
3767 | struct list_head list; | 3751 | struct list_head list; |
3768 | struct ext4_buddy e4b; | 3752 | struct ext4_buddy e4b; |
@@ -3778,11 +3762,6 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3778 | 3762 | ||
3779 | INIT_LIST_HEAD(&list); | 3763 | INIT_LIST_HEAD(&list); |
3780 | 3764 | ||
3781 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
3782 | if (ac) { | ||
3783 | ac->ac_sb = sb; | ||
3784 | ac->ac_inode = inode; | ||
3785 | } | ||
3786 | repeat: | 3765 | repeat: |
3787 | /* first, collect all pa's in the inode */ | 3766 | /* first, collect all pa's in the inode */ |
3788 | spin_lock(&ei->i_prealloc_lock); | 3767 | spin_lock(&ei->i_prealloc_lock); |
@@ -3852,7 +3831,7 @@ repeat: | |||
3852 | 3831 | ||
3853 | ext4_lock_group(sb, group); | 3832 | ext4_lock_group(sb, group); |
3854 | list_del(&pa->pa_group_list); | 3833 | list_del(&pa->pa_group_list); |
3855 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3834 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); |
3856 | ext4_unlock_group(sb, group); | 3835 | ext4_unlock_group(sb, group); |
3857 | 3836 | ||
3858 | ext4_mb_unload_buddy(&e4b); | 3837 | ext4_mb_unload_buddy(&e4b); |
@@ -3861,30 +3840,16 @@ repeat: | |||
3861 | list_del(&pa->u.pa_tmp_list); | 3840 | list_del(&pa->u.pa_tmp_list); |
3862 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 3841 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
3863 | } | 3842 | } |
3864 | if (ac) | ||
3865 | kmem_cache_free(ext4_ac_cachep, ac); | ||
3866 | } | 3843 | } |
3867 | 3844 | ||
3868 | /* | ||
3869 | * finds all preallocated spaces and return blocks being freed to them | ||
3870 | * if preallocated space becomes full (no block is used from the space) | ||
3871 | * then the function frees space in buddy | ||
3872 | * XXX: at the moment, truncate (which is the only way to free blocks) | ||
3873 | * discards all preallocations | ||
3874 | */ | ||
3875 | static void ext4_mb_return_to_preallocation(struct inode *inode, | ||
3876 | struct ext4_buddy *e4b, | ||
3877 | sector_t block, int count) | ||
3878 | { | ||
3879 | BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); | ||
3880 | } | ||
3881 | #ifdef CONFIG_EXT4_DEBUG | 3845 | #ifdef CONFIG_EXT4_DEBUG |
3882 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | 3846 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) |
3883 | { | 3847 | { |
3884 | struct super_block *sb = ac->ac_sb; | 3848 | struct super_block *sb = ac->ac_sb; |
3885 | ext4_group_t ngroups, i; | 3849 | ext4_group_t ngroups, i; |
3886 | 3850 | ||
3887 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | 3851 | if (!mb_enable_debug || |
3852 | (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) | ||
3888 | return; | 3853 | return; |
3889 | 3854 | ||
3890 | printk(KERN_ERR "EXT4-fs: Can't allocate:" | 3855 | printk(KERN_ERR "EXT4-fs: Can't allocate:" |
@@ -4060,14 +4025,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4060 | struct ext4_buddy e4b; | 4025 | struct ext4_buddy e4b; |
4061 | struct list_head discard_list; | 4026 | struct list_head discard_list; |
4062 | struct ext4_prealloc_space *pa, *tmp; | 4027 | struct ext4_prealloc_space *pa, *tmp; |
4063 | struct ext4_allocation_context *ac; | ||
4064 | 4028 | ||
4065 | mb_debug(1, "discard locality group preallocation\n"); | 4029 | mb_debug(1, "discard locality group preallocation\n"); |
4066 | 4030 | ||
4067 | INIT_LIST_HEAD(&discard_list); | 4031 | INIT_LIST_HEAD(&discard_list); |
4068 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
4069 | if (ac) | ||
4070 | ac->ac_sb = sb; | ||
4071 | 4032 | ||
4072 | spin_lock(&lg->lg_prealloc_lock); | 4033 | spin_lock(&lg->lg_prealloc_lock); |
4073 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], | 4034 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], |
@@ -4119,15 +4080,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4119 | } | 4080 | } |
4120 | ext4_lock_group(sb, group); | 4081 | ext4_lock_group(sb, group); |
4121 | list_del(&pa->pa_group_list); | 4082 | list_del(&pa->pa_group_list); |
4122 | ext4_mb_release_group_pa(&e4b, pa, ac); | 4083 | ext4_mb_release_group_pa(&e4b, pa); |
4123 | ext4_unlock_group(sb, group); | 4084 | ext4_unlock_group(sb, group); |
4124 | 4085 | ||
4125 | ext4_mb_unload_buddy(&e4b); | 4086 | ext4_mb_unload_buddy(&e4b); |
4126 | list_del(&pa->u.pa_tmp_list); | 4087 | list_del(&pa->u.pa_tmp_list); |
4127 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 4088 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
4128 | } | 4089 | } |
4129 | if (ac) | ||
4130 | kmem_cache_free(ext4_ac_cachep, ac); | ||
4131 | } | 4090 | } |
4132 | 4091 | ||
4133 | /* | 4092 | /* |
@@ -4203,15 +4162,12 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) | |||
4203 | spin_unlock(&pa->pa_lock); | 4162 | spin_unlock(&pa->pa_lock); |
4204 | } | 4163 | } |
4205 | } | 4164 | } |
4206 | if (ac->alloc_semp) | ||
4207 | up_read(ac->alloc_semp); | ||
4208 | if (pa) { | 4165 | if (pa) { |
4209 | /* | 4166 | /* |
4210 | * We want to add the pa to the right bucket. | 4167 | * We want to add the pa to the right bucket. |
4211 | * Remove it from the list and while adding | 4168 | * Remove it from the list and while adding |
4212 | * make sure the list to which we are adding | 4169 | * make sure the list to which we are adding |
4213 | * doesn't grow big. We need to release | 4170 | * doesn't grow big. |
4214 | * alloc_semp before calling ext4_mb_add_n_trim() | ||
4215 | */ | 4171 | */ |
4216 | if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { | 4172 | if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { |
4217 | spin_lock(pa->pa_obj_lock); | 4173 | spin_lock(pa->pa_obj_lock); |
@@ -4273,14 +4229,16 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4273 | * EDQUOT check, as blocks and quotas have been already | 4229 | * EDQUOT check, as blocks and quotas have been already |
4274 | * reserved when data being copied into pagecache. | 4230 | * reserved when data being copied into pagecache. |
4275 | */ | 4231 | */ |
4276 | if (EXT4_I(ar->inode)->i_delalloc_reserved_flag) | 4232 | if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED)) |
4277 | ar->flags |= EXT4_MB_DELALLOC_RESERVED; | 4233 | ar->flags |= EXT4_MB_DELALLOC_RESERVED; |
4278 | else { | 4234 | else { |
4279 | /* Without delayed allocation we need to verify | 4235 | /* Without delayed allocation we need to verify |
4280 | * there is enough free blocks to do block allocation | 4236 | * there is enough free blocks to do block allocation |
4281 | * and verify allocation doesn't exceed the quota limits. | 4237 | * and verify allocation doesn't exceed the quota limits. |
4282 | */ | 4238 | */ |
4283 | while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { | 4239 | while (ar->len && |
4240 | ext4_claim_free_blocks(sbi, ar->len, ar->flags)) { | ||
4241 | |||
4284 | /* let others to free the space */ | 4242 | /* let others to free the space */ |
4285 | yield(); | 4243 | yield(); |
4286 | ar->len = ar->len >> 1; | 4244 | ar->len = ar->len >> 1; |
@@ -4290,9 +4248,15 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4290 | return 0; | 4248 | return 0; |
4291 | } | 4249 | } |
4292 | reserv_blks = ar->len; | 4250 | reserv_blks = ar->len; |
4293 | while (ar->len && dquot_alloc_block(ar->inode, ar->len)) { | 4251 | if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { |
4294 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4252 | dquot_alloc_block_nofail(ar->inode, ar->len); |
4295 | ar->len--; | 4253 | } else { |
4254 | while (ar->len && | ||
4255 | dquot_alloc_block(ar->inode, ar->len)) { | ||
4256 | |||
4257 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | ||
4258 | ar->len--; | ||
4259 | } | ||
4296 | } | 4260 | } |
4297 | inquota = ar->len; | 4261 | inquota = ar->len; |
4298 | if (ar->len == 0) { | 4262 | if (ar->len == 0) { |
@@ -4370,7 +4334,8 @@ out: | |||
4370 | if (inquota && ar->len < inquota) | 4334 | if (inquota && ar->len < inquota) |
4371 | dquot_free_block(ar->inode, inquota - ar->len); | 4335 | dquot_free_block(ar->inode, inquota - ar->len); |
4372 | if (!ar->len) { | 4336 | if (!ar->len) { |
4373 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) | 4337 | if (!ext4_test_inode_state(ar->inode, |
4338 | EXT4_STATE_DELALLOC_RESERVED)) | ||
4374 | /* release all the reserved blocks if non delalloc */ | 4339 | /* release all the reserved blocks if non delalloc */ |
4375 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 4340 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
4376 | reserv_blks); | 4341 | reserv_blks); |
@@ -4483,7 +4448,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4483 | * @inode: inode | 4448 | * @inode: inode |
4484 | * @block: start physical block to free | 4449 | * @block: start physical block to free |
4485 | * @count: number of blocks to count | 4450 | * @count: number of blocks to count |
4486 | * @metadata: Are these metadata blocks | 4451 | * @flags: flags used by ext4_free_blocks |
4487 | */ | 4452 | */ |
4488 | void ext4_free_blocks(handle_t *handle, struct inode *inode, | 4453 | void ext4_free_blocks(handle_t *handle, struct inode *inode, |
4489 | struct buffer_head *bh, ext4_fsblk_t block, | 4454 | struct buffer_head *bh, ext4_fsblk_t block, |
@@ -4491,7 +4456,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4491 | { | 4456 | { |
4492 | struct buffer_head *bitmap_bh = NULL; | 4457 | struct buffer_head *bitmap_bh = NULL; |
4493 | struct super_block *sb = inode->i_sb; | 4458 | struct super_block *sb = inode->i_sb; |
4494 | struct ext4_allocation_context *ac = NULL; | ||
4495 | struct ext4_group_desc *gdp; | 4459 | struct ext4_group_desc *gdp; |
4496 | unsigned long freed = 0; | 4460 | unsigned long freed = 0; |
4497 | unsigned int overflow; | 4461 | unsigned int overflow; |
@@ -4531,6 +4495,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4531 | if (!bh) | 4495 | if (!bh) |
4532 | tbh = sb_find_get_block(inode->i_sb, | 4496 | tbh = sb_find_get_block(inode->i_sb, |
4533 | block + i); | 4497 | block + i); |
4498 | if (unlikely(!tbh)) | ||
4499 | continue; | ||
4534 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | 4500 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
4535 | inode, tbh, block + i); | 4501 | inode, tbh, block + i); |
4536 | } | 4502 | } |
@@ -4546,12 +4512,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4546 | if (!ext4_should_writeback_data(inode)) | 4512 | if (!ext4_should_writeback_data(inode)) |
4547 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4513 | flags |= EXT4_FREE_BLOCKS_METADATA; |
4548 | 4514 | ||
4549 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | ||
4550 | if (ac) { | ||
4551 | ac->ac_inode = inode; | ||
4552 | ac->ac_sb = sb; | ||
4553 | } | ||
4554 | |||
4555 | do_more: | 4515 | do_more: |
4556 | overflow = 0; | 4516 | overflow = 0; |
4557 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 4517 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
@@ -4609,12 +4569,7 @@ do_more: | |||
4609 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); | 4569 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); |
4610 | } | 4570 | } |
4611 | #endif | 4571 | #endif |
4612 | if (ac) { | 4572 | trace_ext4_mballoc_free(sb, inode, block_group, bit, count); |
4613 | ac->ac_b_ex.fe_group = block_group; | ||
4614 | ac->ac_b_ex.fe_start = bit; | ||
4615 | ac->ac_b_ex.fe_len = count; | ||
4616 | trace_ext4_mballoc_free(ac); | ||
4617 | } | ||
4618 | 4573 | ||
4619 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | 4574 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
4620 | if (err) | 4575 | if (err) |
@@ -4626,7 +4581,11 @@ do_more: | |||
4626 | * blocks being freed are metadata. these blocks shouldn't | 4581 | * blocks being freed are metadata. these blocks shouldn't |
4627 | * be used until this transaction is committed | 4582 | * be used until this transaction is committed |
4628 | */ | 4583 | */ |
4629 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | 4584 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); |
4585 | if (!new_entry) { | ||
4586 | err = -ENOMEM; | ||
4587 | goto error_return; | ||
4588 | } | ||
4630 | new_entry->start_blk = bit; | 4589 | new_entry->start_blk = bit; |
4631 | new_entry->group = block_group; | 4590 | new_entry->group = block_group; |
4632 | new_entry->count = count; | 4591 | new_entry->count = count; |
@@ -4643,9 +4602,6 @@ do_more: | |||
4643 | ext4_lock_group(sb, block_group); | 4602 | ext4_lock_group(sb, block_group); |
4644 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4603 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4645 | mb_free_blocks(inode, &e4b, bit, count); | 4604 | mb_free_blocks(inode, &e4b, bit, count); |
4646 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | ||
4647 | if (test_opt(sb, DISCARD)) | ||
4648 | ext4_issue_discard(sb, block_group, bit, count); | ||
4649 | } | 4605 | } |
4650 | 4606 | ||
4651 | ret = ext4_free_blks_count(sb, gdp) + count; | 4607 | ret = ext4_free_blks_count(sb, gdp) + count; |
@@ -4685,7 +4641,316 @@ error_return: | |||
4685 | dquot_free_block(inode, freed); | 4641 | dquot_free_block(inode, freed); |
4686 | brelse(bitmap_bh); | 4642 | brelse(bitmap_bh); |
4687 | ext4_std_error(sb, err); | 4643 | ext4_std_error(sb, err); |
4688 | if (ac) | ||
4689 | kmem_cache_free(ext4_ac_cachep, ac); | ||
4690 | return; | 4644 | return; |
4691 | } | 4645 | } |
4646 | |||
4647 | /** | ||
4648 | * ext4_add_groupblocks() -- Add given blocks to an existing group | ||
4649 | * @handle: handle to this transaction | ||
4650 | * @sb: super block | ||
4651 | * @block: start physcial block to add to the block group | ||
4652 | * @count: number of blocks to free | ||
4653 | * | ||
4654 | * This marks the blocks as free in the bitmap and buddy. | ||
4655 | */ | ||
4656 | void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | ||
4657 | ext4_fsblk_t block, unsigned long count) | ||
4658 | { | ||
4659 | struct buffer_head *bitmap_bh = NULL; | ||
4660 | struct buffer_head *gd_bh; | ||
4661 | ext4_group_t block_group; | ||
4662 | ext4_grpblk_t bit; | ||
4663 | unsigned int i; | ||
4664 | struct ext4_group_desc *desc; | ||
4665 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
4666 | struct ext4_buddy e4b; | ||
4667 | int err = 0, ret, blk_free_count; | ||
4668 | ext4_grpblk_t blocks_freed; | ||
4669 | struct ext4_group_info *grp; | ||
4670 | |||
4671 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); | ||
4672 | |||
4673 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | ||
4674 | grp = ext4_get_group_info(sb, block_group); | ||
4675 | /* | ||
4676 | * Check to see if we are freeing blocks across a group | ||
4677 | * boundary. | ||
4678 | */ | ||
4679 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) | ||
4680 | goto error_return; | ||
4681 | |||
4682 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | ||
4683 | if (!bitmap_bh) | ||
4684 | goto error_return; | ||
4685 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); | ||
4686 | if (!desc) | ||
4687 | goto error_return; | ||
4688 | |||
4689 | if (in_range(ext4_block_bitmap(sb, desc), block, count) || | ||
4690 | in_range(ext4_inode_bitmap(sb, desc), block, count) || | ||
4691 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | ||
4692 | in_range(block + count - 1, ext4_inode_table(sb, desc), | ||
4693 | sbi->s_itb_per_group)) { | ||
4694 | ext4_error(sb, "Adding blocks in system zones - " | ||
4695 | "Block = %llu, count = %lu", | ||
4696 | block, count); | ||
4697 | goto error_return; | ||
4698 | } | ||
4699 | |||
4700 | BUFFER_TRACE(bitmap_bh, "getting write access"); | ||
4701 | err = ext4_journal_get_write_access(handle, bitmap_bh); | ||
4702 | if (err) | ||
4703 | goto error_return; | ||
4704 | |||
4705 | /* | ||
4706 | * We are about to modify some metadata. Call the journal APIs | ||
4707 | * to unshare ->b_data if a currently-committing transaction is | ||
4708 | * using it | ||
4709 | */ | ||
4710 | BUFFER_TRACE(gd_bh, "get_write_access"); | ||
4711 | err = ext4_journal_get_write_access(handle, gd_bh); | ||
4712 | if (err) | ||
4713 | goto error_return; | ||
4714 | |||
4715 | for (i = 0, blocks_freed = 0; i < count; i++) { | ||
4716 | BUFFER_TRACE(bitmap_bh, "clear bit"); | ||
4717 | if (!mb_test_bit(bit + i, bitmap_bh->b_data)) { | ||
4718 | ext4_error(sb, "bit already cleared for block %llu", | ||
4719 | (ext4_fsblk_t)(block + i)); | ||
4720 | BUFFER_TRACE(bitmap_bh, "bit already cleared"); | ||
4721 | } else { | ||
4722 | blocks_freed++; | ||
4723 | } | ||
4724 | } | ||
4725 | |||
4726 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | ||
4727 | if (err) | ||
4728 | goto error_return; | ||
4729 | |||
4730 | /* | ||
4731 | * need to update group_info->bb_free and bitmap | ||
4732 | * with group lock held. generate_buddy look at | ||
4733 | * them with group lock_held | ||
4734 | */ | ||
4735 | ext4_lock_group(sb, block_group); | ||
4736 | mb_clear_bits(bitmap_bh->b_data, bit, count); | ||
4737 | mb_free_blocks(NULL, &e4b, bit, count); | ||
4738 | blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); | ||
4739 | ext4_free_blks_set(sb, desc, blk_free_count); | ||
4740 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | ||
4741 | ext4_unlock_group(sb, block_group); | ||
4742 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); | ||
4743 | |||
4744 | if (sbi->s_log_groups_per_flex) { | ||
4745 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | ||
4746 | atomic_add(blocks_freed, | ||
4747 | &sbi->s_flex_groups[flex_group].free_blocks); | ||
4748 | } | ||
4749 | |||
4750 | ext4_mb_unload_buddy(&e4b); | ||
4751 | |||
4752 | /* We dirtied the bitmap block */ | ||
4753 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | ||
4754 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | ||
4755 | |||
4756 | /* And the group descriptor block */ | ||
4757 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); | ||
4758 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); | ||
4759 | if (!err) | ||
4760 | err = ret; | ||
4761 | |||
4762 | error_return: | ||
4763 | brelse(bitmap_bh); | ||
4764 | ext4_std_error(sb, err); | ||
4765 | return; | ||
4766 | } | ||
4767 | |||
4768 | /** | ||
4769 | * ext4_trim_extent -- function to TRIM one single free extent in the group | ||
4770 | * @sb: super block for the file system | ||
4771 | * @start: starting block of the free extent in the alloc. group | ||
4772 | * @count: number of blocks to TRIM | ||
4773 | * @group: alloc. group we are working with | ||
4774 | * @e4b: ext4 buddy for the group | ||
4775 | * | ||
4776 | * Trim "count" blocks starting at "start" in the "group". To assure that no | ||
4777 | * one will allocate those blocks, mark it as used in buddy bitmap. This must | ||
4778 | * be called with under the group lock. | ||
4779 | */ | ||
4780 | static void ext4_trim_extent(struct super_block *sb, int start, int count, | ||
4781 | ext4_group_t group, struct ext4_buddy *e4b) | ||
4782 | { | ||
4783 | struct ext4_free_extent ex; | ||
4784 | |||
4785 | assert_spin_locked(ext4_group_lock_ptr(sb, group)); | ||
4786 | |||
4787 | ex.fe_start = start; | ||
4788 | ex.fe_group = group; | ||
4789 | ex.fe_len = count; | ||
4790 | |||
4791 | /* | ||
4792 | * Mark blocks used, so no one can reuse them while | ||
4793 | * being trimmed. | ||
4794 | */ | ||
4795 | mb_mark_used(e4b, &ex); | ||
4796 | ext4_unlock_group(sb, group); | ||
4797 | ext4_issue_discard(sb, group, start, count); | ||
4798 | ext4_lock_group(sb, group); | ||
4799 | mb_free_blocks(NULL, e4b, start, ex.fe_len); | ||
4800 | } | ||
4801 | |||
4802 | /** | ||
4803 | * ext4_trim_all_free -- function to trim all free space in alloc. group | ||
4804 | * @sb: super block for file system | ||
4805 | * @e4b: ext4 buddy | ||
4806 | * @start: first group block to examine | ||
4807 | * @max: last group block to examine | ||
4808 | * @minblocks: minimum extent block count | ||
4809 | * | ||
4810 | * ext4_trim_all_free walks through group's buddy bitmap searching for free | ||
4811 | * extents. When the free block is found, ext4_trim_extent is called to TRIM | ||
4812 | * the extent. | ||
4813 | * | ||
4814 | * | ||
4815 | * ext4_trim_all_free walks through group's block bitmap searching for free | ||
4816 | * extents. When the free extent is found, mark it as used in group buddy | ||
4817 | * bitmap. Then issue a TRIM command on this extent and free the extent in | ||
4818 | * the group buddy bitmap. This is done until whole group is scanned. | ||
4819 | */ | ||
4820 | static ext4_grpblk_t | ||
4821 | ext4_trim_all_free(struct super_block *sb, ext4_group_t group, | ||
4822 | ext4_grpblk_t start, ext4_grpblk_t max, | ||
4823 | ext4_grpblk_t minblocks) | ||
4824 | { | ||
4825 | void *bitmap; | ||
4826 | ext4_grpblk_t next, count = 0; | ||
4827 | struct ext4_buddy e4b; | ||
4828 | int ret; | ||
4829 | |||
4830 | ret = ext4_mb_load_buddy(sb, group, &e4b); | ||
4831 | if (ret) { | ||
4832 | ext4_error(sb, "Error in loading buddy " | ||
4833 | "information for %u", group); | ||
4834 | return ret; | ||
4835 | } | ||
4836 | bitmap = e4b.bd_bitmap; | ||
4837 | |||
4838 | ext4_lock_group(sb, group); | ||
4839 | start = (e4b.bd_info->bb_first_free > start) ? | ||
4840 | e4b.bd_info->bb_first_free : start; | ||
4841 | |||
4842 | while (start < max) { | ||
4843 | start = mb_find_next_zero_bit(bitmap, max, start); | ||
4844 | if (start >= max) | ||
4845 | break; | ||
4846 | next = mb_find_next_bit(bitmap, max, start); | ||
4847 | |||
4848 | if ((next - start) >= minblocks) { | ||
4849 | ext4_trim_extent(sb, start, | ||
4850 | next - start, group, &e4b); | ||
4851 | count += next - start; | ||
4852 | } | ||
4853 | start = next + 1; | ||
4854 | |||
4855 | if (fatal_signal_pending(current)) { | ||
4856 | count = -ERESTARTSYS; | ||
4857 | break; | ||
4858 | } | ||
4859 | |||
4860 | if (need_resched()) { | ||
4861 | ext4_unlock_group(sb, group); | ||
4862 | cond_resched(); | ||
4863 | ext4_lock_group(sb, group); | ||
4864 | } | ||
4865 | |||
4866 | if ((e4b.bd_info->bb_free - count) < minblocks) | ||
4867 | break; | ||
4868 | } | ||
4869 | ext4_unlock_group(sb, group); | ||
4870 | ext4_mb_unload_buddy(&e4b); | ||
4871 | |||
4872 | ext4_debug("trimmed %d blocks in the group %d\n", | ||
4873 | count, group); | ||
4874 | |||
4875 | return count; | ||
4876 | } | ||
4877 | |||
4878 | /** | ||
4879 | * ext4_trim_fs() -- trim ioctl handle function | ||
4880 | * @sb: superblock for filesystem | ||
4881 | * @range: fstrim_range structure | ||
4882 | * | ||
4883 | * start: First Byte to trim | ||
4884 | * len: number of Bytes to trim from start | ||
4885 | * minlen: minimum extent length in Bytes | ||
4886 | * ext4_trim_fs goes through all allocation groups containing Bytes from | ||
4887 | * start to start+len. For each such a group ext4_trim_all_free function | ||
4888 | * is invoked to trim all free space. | ||
4889 | */ | ||
4890 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | ||
4891 | { | ||
4892 | struct ext4_group_info *grp; | ||
4893 | ext4_group_t first_group, last_group; | ||
4894 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); | ||
4895 | ext4_grpblk_t cnt = 0, first_block, last_block; | ||
4896 | uint64_t start, len, minlen, trimmed = 0; | ||
4897 | ext4_fsblk_t first_data_blk = | ||
4898 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | ||
4899 | int ret = 0; | ||
4900 | |||
4901 | start = range->start >> sb->s_blocksize_bits; | ||
4902 | len = range->len >> sb->s_blocksize_bits; | ||
4903 | minlen = range->minlen >> sb->s_blocksize_bits; | ||
4904 | |||
4905 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) | ||
4906 | return -EINVAL; | ||
4907 | if (start < first_data_blk) { | ||
4908 | len -= first_data_blk - start; | ||
4909 | start = first_data_blk; | ||
4910 | } | ||
4911 | |||
4912 | /* Determine first and last group to examine based on start and len */ | ||
4913 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, | ||
4914 | &first_group, &first_block); | ||
4915 | ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), | ||
4916 | &last_group, &last_block); | ||
4917 | last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; | ||
4918 | last_block = EXT4_BLOCKS_PER_GROUP(sb); | ||
4919 | |||
4920 | if (first_group > last_group) | ||
4921 | return -EINVAL; | ||
4922 | |||
4923 | for (group = first_group; group <= last_group; group++) { | ||
4924 | grp = ext4_get_group_info(sb, group); | ||
4925 | /* We only do this if the grp has never been initialized */ | ||
4926 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | ||
4927 | ret = ext4_mb_init_group(sb, group); | ||
4928 | if (ret) | ||
4929 | break; | ||
4930 | } | ||
4931 | |||
4932 | /* | ||
4933 | * For all the groups except the last one, last block will | ||
4934 | * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to | ||
4935 | * change it for the last group in which case start + | ||
4936 | * len < EXT4_BLOCKS_PER_GROUP(sb). | ||
4937 | */ | ||
4938 | if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb)) | ||
4939 | last_block = first_block + len; | ||
4940 | len -= last_block - first_block; | ||
4941 | |||
4942 | if (grp->bb_free >= minlen) { | ||
4943 | cnt = ext4_trim_all_free(sb, group, first_block, | ||
4944 | last_block, minlen); | ||
4945 | if (cnt < 0) { | ||
4946 | ret = cnt; | ||
4947 | break; | ||
4948 | } | ||
4949 | } | ||
4950 | trimmed += cnt; | ||
4951 | first_block = 0; | ||
4952 | } | ||
4953 | range->len = trimmed * sb->s_blocksize; | ||
4954 | |||
4955 | return ret; | ||
4956 | } | ||