diff options
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 459 |
1 files changed, 258 insertions, 201 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d8a16eecf1d5..859f2ae8864e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -787,6 +787,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
787 | struct inode *inode; | 787 | struct inode *inode; |
788 | char *data; | 788 | char *data; |
789 | char *bitmap; | 789 | char *bitmap; |
790 | struct ext4_group_info *grinfo; | ||
790 | 791 | ||
791 | mb_debug(1, "init page %lu\n", page->index); | 792 | mb_debug(1, "init page %lu\n", page->index); |
792 | 793 | ||
@@ -819,6 +820,18 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
819 | if (first_group + i >= ngroups) | 820 | if (first_group + i >= ngroups) |
820 | break; | 821 | break; |
821 | 822 | ||
823 | grinfo = ext4_get_group_info(sb, first_group + i); | ||
824 | /* | ||
825 | * If page is uptodate then we came here after online resize | ||
826 | * which added some new uninitialized group info structs, so | ||
827 | * we must skip all initialized uptodate buddies on the page, | ||
828 | * which may be currently in use by an allocating task. | ||
829 | */ | ||
830 | if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) { | ||
831 | bh[i] = NULL; | ||
832 | continue; | ||
833 | } | ||
834 | |||
822 | err = -EIO; | 835 | err = -EIO; |
823 | desc = ext4_get_group_desc(sb, first_group + i, NULL); | 836 | desc = ext4_get_group_desc(sb, first_group + i, NULL); |
824 | if (desc == NULL) | 837 | if (desc == NULL) |
@@ -871,26 +884,28 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
871 | } | 884 | } |
872 | 885 | ||
873 | /* wait for I/O completion */ | 886 | /* wait for I/O completion */ |
874 | for (i = 0; i < groups_per_page && bh[i]; i++) | 887 | for (i = 0; i < groups_per_page; i++) |
875 | wait_on_buffer(bh[i]); | 888 | if (bh[i]) |
889 | wait_on_buffer(bh[i]); | ||
876 | 890 | ||
877 | err = -EIO; | 891 | err = -EIO; |
878 | for (i = 0; i < groups_per_page && bh[i]; i++) | 892 | for (i = 0; i < groups_per_page; i++) |
879 | if (!buffer_uptodate(bh[i])) | 893 | if (bh[i] && !buffer_uptodate(bh[i])) |
880 | goto out; | 894 | goto out; |
881 | 895 | ||
882 | err = 0; | 896 | err = 0; |
883 | first_block = page->index * blocks_per_page; | 897 | first_block = page->index * blocks_per_page; |
884 | /* init the page */ | ||
885 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); | ||
886 | for (i = 0; i < blocks_per_page; i++) { | 898 | for (i = 0; i < blocks_per_page; i++) { |
887 | int group; | 899 | int group; |
888 | struct ext4_group_info *grinfo; | ||
889 | 900 | ||
890 | group = (first_block + i) >> 1; | 901 | group = (first_block + i) >> 1; |
891 | if (group >= ngroups) | 902 | if (group >= ngroups) |
892 | break; | 903 | break; |
893 | 904 | ||
905 | if (!bh[group - first_group]) | ||
906 | /* skip initialized uptodate buddy */ | ||
907 | continue; | ||
908 | |||
894 | /* | 909 | /* |
895 | * data carry information regarding this | 910 | * data carry information regarding this |
896 | * particular group in the format specified | 911 | * particular group in the format specified |
@@ -919,6 +934,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
919 | * incore got set to the group block bitmap below | 934 | * incore got set to the group block bitmap below |
920 | */ | 935 | */ |
921 | ext4_lock_group(sb, group); | 936 | ext4_lock_group(sb, group); |
937 | /* init the buddy */ | ||
938 | memset(data, 0xff, blocksize); | ||
922 | ext4_mb_generate_buddy(sb, data, incore, group); | 939 | ext4_mb_generate_buddy(sb, data, incore, group); |
923 | ext4_unlock_group(sb, group); | 940 | ext4_unlock_group(sb, group); |
924 | incore = NULL; | 941 | incore = NULL; |
@@ -948,7 +965,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
948 | 965 | ||
949 | out: | 966 | out: |
950 | if (bh) { | 967 | if (bh) { |
951 | for (i = 0; i < groups_per_page && bh[i]; i++) | 968 | for (i = 0; i < groups_per_page; i++) |
952 | brelse(bh[i]); | 969 | brelse(bh[i]); |
953 | if (bh != &bhs) | 970 | if (bh != &bhs) |
954 | kfree(bh); | 971 | kfree(bh); |
@@ -957,22 +974,21 @@ out: | |||
957 | } | 974 | } |
958 | 975 | ||
959 | /* | 976 | /* |
960 | * lock the group_info alloc_sem of all the groups | 977 | * Lock the buddy and bitmap pages. This make sure other parallel init_group |
961 | * belonging to the same buddy cache page. This | 978 | * on the same buddy page doesn't happen whild holding the buddy page lock. |
962 | * make sure other parallel operation on the buddy | 979 | * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap |
963 | * cache doesn't happen whild holding the buddy cache | 980 | * are on the same page e4b->bd_buddy_page is NULL and return value is 0. |
964 | * lock | ||
965 | */ | 981 | */ |
966 | static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, | 982 | static int ext4_mb_get_buddy_page_lock(struct super_block *sb, |
967 | ext4_group_t group) | 983 | ext4_group_t group, struct ext4_buddy *e4b) |
968 | { | 984 | { |
969 | int i; | 985 | struct inode *inode = EXT4_SB(sb)->s_buddy_cache; |
970 | int block, pnum; | 986 | int block, pnum, poff; |
971 | int blocks_per_page; | 987 | int blocks_per_page; |
972 | int groups_per_page; | 988 | struct page *page; |
973 | ext4_group_t ngroups = ext4_get_groups_count(sb); | 989 | |
974 | ext4_group_t first_group; | 990 | e4b->bd_buddy_page = NULL; |
975 | struct ext4_group_info *grp; | 991 | e4b->bd_bitmap_page = NULL; |
976 | 992 | ||
977 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | 993 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
978 | /* | 994 | /* |
@@ -982,57 +998,40 @@ static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, | |||
982 | */ | 998 | */ |
983 | block = group * 2; | 999 | block = group * 2; |
984 | pnum = block / blocks_per_page; | 1000 | pnum = block / blocks_per_page; |
985 | first_group = pnum * blocks_per_page / 2; | 1001 | poff = block % blocks_per_page; |
986 | 1002 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | |
987 | groups_per_page = blocks_per_page >> 1; | 1003 | if (!page) |
988 | if (groups_per_page == 0) | 1004 | return -EIO; |
989 | groups_per_page = 1; | 1005 | BUG_ON(page->mapping != inode->i_mapping); |
990 | /* read all groups the page covers into the cache */ | 1006 | e4b->bd_bitmap_page = page; |
991 | for (i = 0; i < groups_per_page; i++) { | 1007 | e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); |
992 | 1008 | ||
993 | if ((first_group + i) >= ngroups) | 1009 | if (blocks_per_page >= 2) { |
994 | break; | 1010 | /* buddy and bitmap are on the same page */ |
995 | grp = ext4_get_group_info(sb, first_group + i); | 1011 | return 0; |
996 | /* take all groups write allocation | ||
997 | * semaphore. This make sure there is | ||
998 | * no block allocation going on in any | ||
999 | * of that groups | ||
1000 | */ | ||
1001 | down_write_nested(&grp->alloc_sem, i); | ||
1002 | } | 1012 | } |
1003 | return i; | 1013 | |
1014 | block++; | ||
1015 | pnum = block / blocks_per_page; | ||
1016 | poff = block % blocks_per_page; | ||
1017 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1018 | if (!page) | ||
1019 | return -EIO; | ||
1020 | BUG_ON(page->mapping != inode->i_mapping); | ||
1021 | e4b->bd_buddy_page = page; | ||
1022 | return 0; | ||
1004 | } | 1023 | } |
1005 | 1024 | ||
1006 | static void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | 1025 | static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b) |
1007 | ext4_group_t group, int locked_group) | ||
1008 | { | 1026 | { |
1009 | int i; | 1027 | if (e4b->bd_bitmap_page) { |
1010 | int block, pnum; | 1028 | unlock_page(e4b->bd_bitmap_page); |
1011 | int blocks_per_page; | 1029 | page_cache_release(e4b->bd_bitmap_page); |
1012 | ext4_group_t first_group; | 1030 | } |
1013 | struct ext4_group_info *grp; | 1031 | if (e4b->bd_buddy_page) { |
1014 | 1032 | unlock_page(e4b->bd_buddy_page); | |
1015 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | 1033 | page_cache_release(e4b->bd_buddy_page); |
1016 | /* | ||
1017 | * the buddy cache inode stores the block bitmap | ||
1018 | * and buddy information in consecutive blocks. | ||
1019 | * So for each group we need two blocks. | ||
1020 | */ | ||
1021 | block = group * 2; | ||
1022 | pnum = block / blocks_per_page; | ||
1023 | first_group = pnum * blocks_per_page / 2; | ||
1024 | /* release locks on all the groups */ | ||
1025 | for (i = 0; i < locked_group; i++) { | ||
1026 | |||
1027 | grp = ext4_get_group_info(sb, first_group + i); | ||
1028 | /* take all groups write allocation | ||
1029 | * semaphore. This make sure there is | ||
1030 | * no block allocation going on in any | ||
1031 | * of that groups | ||
1032 | */ | ||
1033 | up_write(&grp->alloc_sem); | ||
1034 | } | 1034 | } |
1035 | |||
1036 | } | 1035 | } |
1037 | 1036 | ||
1038 | /* | 1037 | /* |
@@ -1044,93 +1043,60 @@ static noinline_for_stack | |||
1044 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | 1043 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) |
1045 | { | 1044 | { |
1046 | 1045 | ||
1047 | int ret = 0; | ||
1048 | void *bitmap; | ||
1049 | int blocks_per_page; | ||
1050 | int block, pnum, poff; | ||
1051 | int num_grp_locked = 0; | ||
1052 | struct ext4_group_info *this_grp; | 1046 | struct ext4_group_info *this_grp; |
1053 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1047 | struct ext4_buddy e4b; |
1054 | struct inode *inode = sbi->s_buddy_cache; | 1048 | struct page *page; |
1055 | struct page *page = NULL, *bitmap_page = NULL; | 1049 | int ret = 0; |
1056 | 1050 | ||
1057 | mb_debug(1, "init group %u\n", group); | 1051 | mb_debug(1, "init group %u\n", group); |
1058 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1059 | this_grp = ext4_get_group_info(sb, group); | 1052 | this_grp = ext4_get_group_info(sb, group); |
1060 | /* | 1053 | /* |
1061 | * This ensures that we don't reinit the buddy cache | 1054 | * This ensures that we don't reinit the buddy cache |
1062 | * page which map to the group from which we are already | 1055 | * page which map to the group from which we are already |
1063 | * allocating. If we are looking at the buddy cache we would | 1056 | * allocating. If we are looking at the buddy cache we would |
1064 | * have taken a reference using ext4_mb_load_buddy and that | 1057 | * have taken a reference using ext4_mb_load_buddy and that |
1065 | * would have taken the alloc_sem lock. | 1058 | * would have pinned buddy page to page cache. |
1066 | */ | 1059 | */ |
1067 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | 1060 | ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b); |
1068 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | 1061 | if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { |
1069 | /* | 1062 | /* |
1070 | * somebody initialized the group | 1063 | * somebody initialized the group |
1071 | * return without doing anything | 1064 | * return without doing anything |
1072 | */ | 1065 | */ |
1073 | ret = 0; | ||
1074 | goto err; | 1066 | goto err; |
1075 | } | 1067 | } |
1076 | /* | 1068 | |
1077 | * the buddy cache inode stores the block bitmap | 1069 | page = e4b.bd_bitmap_page; |
1078 | * and buddy information in consecutive blocks. | 1070 | ret = ext4_mb_init_cache(page, NULL); |
1079 | * So for each group we need two blocks. | 1071 | if (ret) |
1080 | */ | 1072 | goto err; |
1081 | block = group * 2; | 1073 | if (!PageUptodate(page)) { |
1082 | pnum = block / blocks_per_page; | ||
1083 | poff = block % blocks_per_page; | ||
1084 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1085 | if (page) { | ||
1086 | BUG_ON(page->mapping != inode->i_mapping); | ||
1087 | ret = ext4_mb_init_cache(page, NULL); | ||
1088 | if (ret) { | ||
1089 | unlock_page(page); | ||
1090 | goto err; | ||
1091 | } | ||
1092 | unlock_page(page); | ||
1093 | } | ||
1094 | if (page == NULL || !PageUptodate(page)) { | ||
1095 | ret = -EIO; | 1074 | ret = -EIO; |
1096 | goto err; | 1075 | goto err; |
1097 | } | 1076 | } |
1098 | mark_page_accessed(page); | 1077 | mark_page_accessed(page); |
1099 | bitmap_page = page; | ||
1100 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1101 | 1078 | ||
1102 | /* init buddy cache */ | 1079 | if (e4b.bd_buddy_page == NULL) { |
1103 | block++; | ||
1104 | pnum = block / blocks_per_page; | ||
1105 | poff = block % blocks_per_page; | ||
1106 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1107 | if (page == bitmap_page) { | ||
1108 | /* | 1080 | /* |
1109 | * If both the bitmap and buddy are in | 1081 | * If both the bitmap and buddy are in |
1110 | * the same page we don't need to force | 1082 | * the same page we don't need to force |
1111 | * init the buddy | 1083 | * init the buddy |
1112 | */ | 1084 | */ |
1113 | unlock_page(page); | 1085 | ret = 0; |
1114 | } else if (page) { | 1086 | goto err; |
1115 | BUG_ON(page->mapping != inode->i_mapping); | ||
1116 | ret = ext4_mb_init_cache(page, bitmap); | ||
1117 | if (ret) { | ||
1118 | unlock_page(page); | ||
1119 | goto err; | ||
1120 | } | ||
1121 | unlock_page(page); | ||
1122 | } | 1087 | } |
1123 | if (page == NULL || !PageUptodate(page)) { | 1088 | /* init buddy cache */ |
1089 | page = e4b.bd_buddy_page; | ||
1090 | ret = ext4_mb_init_cache(page, e4b.bd_bitmap); | ||
1091 | if (ret) | ||
1092 | goto err; | ||
1093 | if (!PageUptodate(page)) { | ||
1124 | ret = -EIO; | 1094 | ret = -EIO; |
1125 | goto err; | 1095 | goto err; |
1126 | } | 1096 | } |
1127 | mark_page_accessed(page); | 1097 | mark_page_accessed(page); |
1128 | err: | 1098 | err: |
1129 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | 1099 | ext4_mb_put_buddy_page_lock(&e4b); |
1130 | if (bitmap_page) | ||
1131 | page_cache_release(bitmap_page); | ||
1132 | if (page) | ||
1133 | page_cache_release(page); | ||
1134 | return ret; | 1100 | return ret; |
1135 | } | 1101 | } |
1136 | 1102 | ||
@@ -1164,24 +1130,8 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
1164 | e4b->bd_group = group; | 1130 | e4b->bd_group = group; |
1165 | e4b->bd_buddy_page = NULL; | 1131 | e4b->bd_buddy_page = NULL; |
1166 | e4b->bd_bitmap_page = NULL; | 1132 | e4b->bd_bitmap_page = NULL; |
1167 | e4b->alloc_semp = &grp->alloc_sem; | ||
1168 | |||
1169 | /* Take the read lock on the group alloc | ||
1170 | * sem. This would make sure a parallel | ||
1171 | * ext4_mb_init_group happening on other | ||
1172 | * groups mapped by the page is blocked | ||
1173 | * till we are done with allocation | ||
1174 | */ | ||
1175 | repeat_load_buddy: | ||
1176 | down_read(e4b->alloc_semp); | ||
1177 | 1133 | ||
1178 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | 1134 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
1179 | /* we need to check for group need init flag | ||
1180 | * with alloc_semp held so that we can be sure | ||
1181 | * that new blocks didn't get added to the group | ||
1182 | * when we are loading the buddy cache | ||
1183 | */ | ||
1184 | up_read(e4b->alloc_semp); | ||
1185 | /* | 1135 | /* |
1186 | * we need full data about the group | 1136 | * we need full data about the group |
1187 | * to make a good selection | 1137 | * to make a good selection |
@@ -1189,7 +1139,6 @@ repeat_load_buddy: | |||
1189 | ret = ext4_mb_init_group(sb, group); | 1139 | ret = ext4_mb_init_group(sb, group); |
1190 | if (ret) | 1140 | if (ret) |
1191 | return ret; | 1141 | return ret; |
1192 | goto repeat_load_buddy; | ||
1193 | } | 1142 | } |
1194 | 1143 | ||
1195 | /* | 1144 | /* |
@@ -1273,15 +1222,14 @@ repeat_load_buddy: | |||
1273 | return 0; | 1222 | return 0; |
1274 | 1223 | ||
1275 | err: | 1224 | err: |
1225 | if (page) | ||
1226 | page_cache_release(page); | ||
1276 | if (e4b->bd_bitmap_page) | 1227 | if (e4b->bd_bitmap_page) |
1277 | page_cache_release(e4b->bd_bitmap_page); | 1228 | page_cache_release(e4b->bd_bitmap_page); |
1278 | if (e4b->bd_buddy_page) | 1229 | if (e4b->bd_buddy_page) |
1279 | page_cache_release(e4b->bd_buddy_page); | 1230 | page_cache_release(e4b->bd_buddy_page); |
1280 | e4b->bd_buddy = NULL; | 1231 | e4b->bd_buddy = NULL; |
1281 | e4b->bd_bitmap = NULL; | 1232 | e4b->bd_bitmap = NULL; |
1282 | |||
1283 | /* Done with the buddy cache */ | ||
1284 | up_read(e4b->alloc_semp); | ||
1285 | return ret; | 1233 | return ret; |
1286 | } | 1234 | } |
1287 | 1235 | ||
@@ -1291,9 +1239,6 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) | |||
1291 | page_cache_release(e4b->bd_bitmap_page); | 1239 | page_cache_release(e4b->bd_bitmap_page); |
1292 | if (e4b->bd_buddy_page) | 1240 | if (e4b->bd_buddy_page) |
1293 | page_cache_release(e4b->bd_buddy_page); | 1241 | page_cache_release(e4b->bd_buddy_page); |
1294 | /* Done with the buddy cache */ | ||
1295 | if (e4b->alloc_semp) | ||
1296 | up_read(e4b->alloc_semp); | ||
1297 | } | 1242 | } |
1298 | 1243 | ||
1299 | 1244 | ||
@@ -1606,9 +1551,6 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, | |||
1606 | get_page(ac->ac_bitmap_page); | 1551 | get_page(ac->ac_bitmap_page); |
1607 | ac->ac_buddy_page = e4b->bd_buddy_page; | 1552 | ac->ac_buddy_page = e4b->bd_buddy_page; |
1608 | get_page(ac->ac_buddy_page); | 1553 | get_page(ac->ac_buddy_page); |
1609 | /* on allocation we use ac to track the held semaphore */ | ||
1610 | ac->alloc_semp = e4b->alloc_semp; | ||
1611 | e4b->alloc_semp = NULL; | ||
1612 | /* store last allocated for subsequent stream allocation */ | 1554 | /* store last allocated for subsequent stream allocation */ |
1613 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { | 1555 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
1614 | spin_lock(&sbi->s_md_lock); | 1556 | spin_lock(&sbi->s_md_lock); |
@@ -2659,7 +2601,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2659 | struct super_block *sb = journal->j_private; | 2601 | struct super_block *sb = journal->j_private; |
2660 | struct ext4_buddy e4b; | 2602 | struct ext4_buddy e4b; |
2661 | struct ext4_group_info *db; | 2603 | struct ext4_group_info *db; |
2662 | int err, ret, count = 0, count2 = 0; | 2604 | int err, count = 0, count2 = 0; |
2663 | struct ext4_free_data *entry; | 2605 | struct ext4_free_data *entry; |
2664 | struct list_head *l, *ltmp; | 2606 | struct list_head *l, *ltmp; |
2665 | 2607 | ||
@@ -2669,15 +2611,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2669 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", | 2611 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
2670 | entry->count, entry->group, entry); | 2612 | entry->count, entry->group, entry); |
2671 | 2613 | ||
2672 | if (test_opt(sb, DISCARD)) { | 2614 | if (test_opt(sb, DISCARD)) |
2673 | ret = ext4_issue_discard(sb, entry->group, | 2615 | ext4_issue_discard(sb, entry->group, |
2674 | entry->start_blk, entry->count); | 2616 | entry->start_blk, entry->count); |
2675 | if (unlikely(ret == -EOPNOTSUPP)) { | ||
2676 | ext4_warning(sb, "discard not supported, " | ||
2677 | "disabling"); | ||
2678 | clear_opt(sb, DISCARD); | ||
2679 | } | ||
2680 | } | ||
2681 | 2617 | ||
2682 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2618 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2683 | /* we expect to find existing buddy because it's pinned */ | 2619 | /* we expect to find existing buddy because it's pinned */ |
@@ -4226,15 +4162,12 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) | |||
4226 | spin_unlock(&pa->pa_lock); | 4162 | spin_unlock(&pa->pa_lock); |
4227 | } | 4163 | } |
4228 | } | 4164 | } |
4229 | if (ac->alloc_semp) | ||
4230 | up_read(ac->alloc_semp); | ||
4231 | if (pa) { | 4165 | if (pa) { |
4232 | /* | 4166 | /* |
4233 | * We want to add the pa to the right bucket. | 4167 | * We want to add the pa to the right bucket. |
4234 | * Remove it from the list and while adding | 4168 | * Remove it from the list and while adding |
4235 | * make sure the list to which we are adding | 4169 | * make sure the list to which we are adding |
4236 | * doesn't grow big. We need to release | 4170 | * doesn't grow big. |
4237 | * alloc_semp before calling ext4_mb_add_n_trim() | ||
4238 | */ | 4171 | */ |
4239 | if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { | 4172 | if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { |
4240 | spin_lock(pa->pa_obj_lock); | 4173 | spin_lock(pa->pa_obj_lock); |
@@ -4303,7 +4236,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4303 | * there is enough free blocks to do block allocation | 4236 | * there is enough free blocks to do block allocation |
4304 | * and verify allocation doesn't exceed the quota limits. | 4237 | * and verify allocation doesn't exceed the quota limits. |
4305 | */ | 4238 | */ |
4306 | while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { | 4239 | while (ar->len && |
4240 | ext4_claim_free_blocks(sbi, ar->len, ar->flags)) { | ||
4241 | |||
4307 | /* let others to free the space */ | 4242 | /* let others to free the space */ |
4308 | yield(); | 4243 | yield(); |
4309 | ar->len = ar->len >> 1; | 4244 | ar->len = ar->len >> 1; |
@@ -4313,9 +4248,15 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4313 | return 0; | 4248 | return 0; |
4314 | } | 4249 | } |
4315 | reserv_blks = ar->len; | 4250 | reserv_blks = ar->len; |
4316 | while (ar->len && dquot_alloc_block(ar->inode, ar->len)) { | 4251 | if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { |
4317 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4252 | dquot_alloc_block_nofail(ar->inode, ar->len); |
4318 | ar->len--; | 4253 | } else { |
4254 | while (ar->len && | ||
4255 | dquot_alloc_block(ar->inode, ar->len)) { | ||
4256 | |||
4257 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | ||
4258 | ar->len--; | ||
4259 | } | ||
4319 | } | 4260 | } |
4320 | inquota = ar->len; | 4261 | inquota = ar->len; |
4321 | if (ar->len == 0) { | 4262 | if (ar->len == 0) { |
@@ -4704,6 +4645,127 @@ error_return: | |||
4704 | } | 4645 | } |
4705 | 4646 | ||
4706 | /** | 4647 | /** |
4648 | * ext4_add_groupblocks() -- Add given blocks to an existing group | ||
4649 | * @handle: handle to this transaction | ||
4650 | * @sb: super block | ||
4651 | * @block: start physcial block to add to the block group | ||
4652 | * @count: number of blocks to free | ||
4653 | * | ||
4654 | * This marks the blocks as free in the bitmap and buddy. | ||
4655 | */ | ||
4656 | void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | ||
4657 | ext4_fsblk_t block, unsigned long count) | ||
4658 | { | ||
4659 | struct buffer_head *bitmap_bh = NULL; | ||
4660 | struct buffer_head *gd_bh; | ||
4661 | ext4_group_t block_group; | ||
4662 | ext4_grpblk_t bit; | ||
4663 | unsigned int i; | ||
4664 | struct ext4_group_desc *desc; | ||
4665 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
4666 | struct ext4_buddy e4b; | ||
4667 | int err = 0, ret, blk_free_count; | ||
4668 | ext4_grpblk_t blocks_freed; | ||
4669 | struct ext4_group_info *grp; | ||
4670 | |||
4671 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); | ||
4672 | |||
4673 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | ||
4674 | grp = ext4_get_group_info(sb, block_group); | ||
4675 | /* | ||
4676 | * Check to see if we are freeing blocks across a group | ||
4677 | * boundary. | ||
4678 | */ | ||
4679 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) | ||
4680 | goto error_return; | ||
4681 | |||
4682 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | ||
4683 | if (!bitmap_bh) | ||
4684 | goto error_return; | ||
4685 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); | ||
4686 | if (!desc) | ||
4687 | goto error_return; | ||
4688 | |||
4689 | if (in_range(ext4_block_bitmap(sb, desc), block, count) || | ||
4690 | in_range(ext4_inode_bitmap(sb, desc), block, count) || | ||
4691 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | ||
4692 | in_range(block + count - 1, ext4_inode_table(sb, desc), | ||
4693 | sbi->s_itb_per_group)) { | ||
4694 | ext4_error(sb, "Adding blocks in system zones - " | ||
4695 | "Block = %llu, count = %lu", | ||
4696 | block, count); | ||
4697 | goto error_return; | ||
4698 | } | ||
4699 | |||
4700 | BUFFER_TRACE(bitmap_bh, "getting write access"); | ||
4701 | err = ext4_journal_get_write_access(handle, bitmap_bh); | ||
4702 | if (err) | ||
4703 | goto error_return; | ||
4704 | |||
4705 | /* | ||
4706 | * We are about to modify some metadata. Call the journal APIs | ||
4707 | * to unshare ->b_data if a currently-committing transaction is | ||
4708 | * using it | ||
4709 | */ | ||
4710 | BUFFER_TRACE(gd_bh, "get_write_access"); | ||
4711 | err = ext4_journal_get_write_access(handle, gd_bh); | ||
4712 | if (err) | ||
4713 | goto error_return; | ||
4714 | |||
4715 | for (i = 0, blocks_freed = 0; i < count; i++) { | ||
4716 | BUFFER_TRACE(bitmap_bh, "clear bit"); | ||
4717 | if (!mb_test_bit(bit + i, bitmap_bh->b_data)) { | ||
4718 | ext4_error(sb, "bit already cleared for block %llu", | ||
4719 | (ext4_fsblk_t)(block + i)); | ||
4720 | BUFFER_TRACE(bitmap_bh, "bit already cleared"); | ||
4721 | } else { | ||
4722 | blocks_freed++; | ||
4723 | } | ||
4724 | } | ||
4725 | |||
4726 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | ||
4727 | if (err) | ||
4728 | goto error_return; | ||
4729 | |||
4730 | /* | ||
4731 | * need to update group_info->bb_free and bitmap | ||
4732 | * with group lock held. generate_buddy look at | ||
4733 | * them with group lock_held | ||
4734 | */ | ||
4735 | ext4_lock_group(sb, block_group); | ||
4736 | mb_clear_bits(bitmap_bh->b_data, bit, count); | ||
4737 | mb_free_blocks(NULL, &e4b, bit, count); | ||
4738 | blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); | ||
4739 | ext4_free_blks_set(sb, desc, blk_free_count); | ||
4740 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | ||
4741 | ext4_unlock_group(sb, block_group); | ||
4742 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); | ||
4743 | |||
4744 | if (sbi->s_log_groups_per_flex) { | ||
4745 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | ||
4746 | atomic_add(blocks_freed, | ||
4747 | &sbi->s_flex_groups[flex_group].free_blocks); | ||
4748 | } | ||
4749 | |||
4750 | ext4_mb_unload_buddy(&e4b); | ||
4751 | |||
4752 | /* We dirtied the bitmap block */ | ||
4753 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | ||
4754 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | ||
4755 | |||
4756 | /* And the group descriptor block */ | ||
4757 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); | ||
4758 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); | ||
4759 | if (!err) | ||
4760 | err = ret; | ||
4761 | |||
4762 | error_return: | ||
4763 | brelse(bitmap_bh); | ||
4764 | ext4_std_error(sb, err); | ||
4765 | return; | ||
4766 | } | ||
4767 | |||
4768 | /** | ||
4707 | * ext4_trim_extent -- function to TRIM one single free extent in the group | 4769 | * ext4_trim_extent -- function to TRIM one single free extent in the group |
4708 | * @sb: super block for the file system | 4770 | * @sb: super block for the file system |
4709 | * @start: starting block of the free extent in the alloc. group | 4771 | * @start: starting block of the free extent in the alloc. group |
@@ -4715,11 +4777,10 @@ error_return: | |||
4715 | * one will allocate those blocks, mark it as used in buddy bitmap. This must | 4777 | * one will allocate those blocks, mark it as used in buddy bitmap. This must |
4716 | * be called with under the group lock. | 4778 | * be called with under the group lock. |
4717 | */ | 4779 | */ |
4718 | static int ext4_trim_extent(struct super_block *sb, int start, int count, | 4780 | static void ext4_trim_extent(struct super_block *sb, int start, int count, |
4719 | ext4_group_t group, struct ext4_buddy *e4b) | 4781 | ext4_group_t group, struct ext4_buddy *e4b) |
4720 | { | 4782 | { |
4721 | struct ext4_free_extent ex; | 4783 | struct ext4_free_extent ex; |
4722 | int ret = 0; | ||
4723 | 4784 | ||
4724 | assert_spin_locked(ext4_group_lock_ptr(sb, group)); | 4785 | assert_spin_locked(ext4_group_lock_ptr(sb, group)); |
4725 | 4786 | ||
@@ -4733,12 +4794,9 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, | |||
4733 | */ | 4794 | */ |
4734 | mb_mark_used(e4b, &ex); | 4795 | mb_mark_used(e4b, &ex); |
4735 | ext4_unlock_group(sb, group); | 4796 | ext4_unlock_group(sb, group); |
4736 | 4797 | ext4_issue_discard(sb, group, start, count); | |
4737 | ret = ext4_issue_discard(sb, group, start, count); | ||
4738 | |||
4739 | ext4_lock_group(sb, group); | 4798 | ext4_lock_group(sb, group); |
4740 | mb_free_blocks(NULL, e4b, start, ex.fe_len); | 4799 | mb_free_blocks(NULL, e4b, start, ex.fe_len); |
4741 | return ret; | ||
4742 | } | 4800 | } |
4743 | 4801 | ||
4744 | /** | 4802 | /** |
@@ -4760,21 +4818,26 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, | |||
4760 | * the group buddy bitmap. This is done until whole group is scanned. | 4818 | * the group buddy bitmap. This is done until whole group is scanned. |
4761 | */ | 4819 | */ |
4762 | static ext4_grpblk_t | 4820 | static ext4_grpblk_t |
4763 | ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | 4821 | ext4_trim_all_free(struct super_block *sb, ext4_group_t group, |
4764 | ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) | 4822 | ext4_grpblk_t start, ext4_grpblk_t max, |
4823 | ext4_grpblk_t minblocks) | ||
4765 | { | 4824 | { |
4766 | void *bitmap; | 4825 | void *bitmap; |
4767 | ext4_grpblk_t next, count = 0; | 4826 | ext4_grpblk_t next, count = 0; |
4768 | ext4_group_t group; | 4827 | struct ext4_buddy e4b; |
4769 | int ret = 0; | 4828 | int ret; |
4770 | 4829 | ||
4771 | BUG_ON(e4b == NULL); | 4830 | ret = ext4_mb_load_buddy(sb, group, &e4b); |
4831 | if (ret) { | ||
4832 | ext4_error(sb, "Error in loading buddy " | ||
4833 | "information for %u", group); | ||
4834 | return ret; | ||
4835 | } | ||
4836 | bitmap = e4b.bd_bitmap; | ||
4772 | 4837 | ||
4773 | bitmap = e4b->bd_bitmap; | ||
4774 | group = e4b->bd_group; | ||
4775 | start = (e4b->bd_info->bb_first_free > start) ? | ||
4776 | e4b->bd_info->bb_first_free : start; | ||
4777 | ext4_lock_group(sb, group); | 4838 | ext4_lock_group(sb, group); |
4839 | start = (e4b.bd_info->bb_first_free > start) ? | ||
4840 | e4b.bd_info->bb_first_free : start; | ||
4778 | 4841 | ||
4779 | while (start < max) { | 4842 | while (start < max) { |
4780 | start = mb_find_next_zero_bit(bitmap, max, start); | 4843 | start = mb_find_next_zero_bit(bitmap, max, start); |
@@ -4783,10 +4846,8 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | |||
4783 | next = mb_find_next_bit(bitmap, max, start); | 4846 | next = mb_find_next_bit(bitmap, max, start); |
4784 | 4847 | ||
4785 | if ((next - start) >= minblocks) { | 4848 | if ((next - start) >= minblocks) { |
4786 | ret = ext4_trim_extent(sb, start, | 4849 | ext4_trim_extent(sb, start, |
4787 | next - start, group, e4b); | 4850 | next - start, group, &e4b); |
4788 | if (ret < 0) | ||
4789 | break; | ||
4790 | count += next - start; | 4851 | count += next - start; |
4791 | } | 4852 | } |
4792 | start = next + 1; | 4853 | start = next + 1; |
@@ -4802,17 +4863,15 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | |||
4802 | ext4_lock_group(sb, group); | 4863 | ext4_lock_group(sb, group); |
4803 | } | 4864 | } |
4804 | 4865 | ||
4805 | if ((e4b->bd_info->bb_free - count) < minblocks) | 4866 | if ((e4b.bd_info->bb_free - count) < minblocks) |
4806 | break; | 4867 | break; |
4807 | } | 4868 | } |
4808 | ext4_unlock_group(sb, group); | 4869 | ext4_unlock_group(sb, group); |
4870 | ext4_mb_unload_buddy(&e4b); | ||
4809 | 4871 | ||
4810 | ext4_debug("trimmed %d blocks in the group %d\n", | 4872 | ext4_debug("trimmed %d blocks in the group %d\n", |
4811 | count, group); | 4873 | count, group); |
4812 | 4874 | ||
4813 | if (ret < 0) | ||
4814 | count = ret; | ||
4815 | |||
4816 | return count; | 4875 | return count; |
4817 | } | 4876 | } |
4818 | 4877 | ||
@@ -4830,11 +4889,11 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | |||
4830 | */ | 4889 | */ |
4831 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | 4890 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) |
4832 | { | 4891 | { |
4833 | struct ext4_buddy e4b; | 4892 | struct ext4_group_info *grp; |
4834 | ext4_group_t first_group, last_group; | 4893 | ext4_group_t first_group, last_group; |
4835 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); | 4894 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); |
4836 | ext4_grpblk_t cnt = 0, first_block, last_block; | 4895 | ext4_grpblk_t cnt = 0, first_block, last_block; |
4837 | uint64_t start, len, minlen, trimmed; | 4896 | uint64_t start, len, minlen, trimmed = 0; |
4838 | ext4_fsblk_t first_data_blk = | 4897 | ext4_fsblk_t first_data_blk = |
4839 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 4898 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
4840 | int ret = 0; | 4899 | int ret = 0; |
@@ -4842,7 +4901,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4842 | start = range->start >> sb->s_blocksize_bits; | 4901 | start = range->start >> sb->s_blocksize_bits; |
4843 | len = range->len >> sb->s_blocksize_bits; | 4902 | len = range->len >> sb->s_blocksize_bits; |
4844 | minlen = range->minlen >> sb->s_blocksize_bits; | 4903 | minlen = range->minlen >> sb->s_blocksize_bits; |
4845 | trimmed = 0; | ||
4846 | 4904 | ||
4847 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) | 4905 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) |
4848 | return -EINVAL; | 4906 | return -EINVAL; |
@@ -4863,11 +4921,12 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4863 | return -EINVAL; | 4921 | return -EINVAL; |
4864 | 4922 | ||
4865 | for (group = first_group; group <= last_group; group++) { | 4923 | for (group = first_group; group <= last_group; group++) { |
4866 | ret = ext4_mb_load_buddy(sb, group, &e4b); | 4924 | grp = ext4_get_group_info(sb, group); |
4867 | if (ret) { | 4925 | /* We only do this if the grp has never been initialized */ |
4868 | ext4_error(sb, "Error in loading buddy " | 4926 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
4869 | "information for %u", group); | 4927 | ret = ext4_mb_init_group(sb, group); |
4870 | break; | 4928 | if (ret) |
4929 | break; | ||
4871 | } | 4930 | } |
4872 | 4931 | ||
4873 | /* | 4932 | /* |
@@ -4880,16 +4939,14 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4880 | last_block = first_block + len; | 4939 | last_block = first_block + len; |
4881 | len -= last_block - first_block; | 4940 | len -= last_block - first_block; |
4882 | 4941 | ||
4883 | if (e4b.bd_info->bb_free >= minlen) { | 4942 | if (grp->bb_free >= minlen) { |
4884 | cnt = ext4_trim_all_free(sb, &e4b, first_block, | 4943 | cnt = ext4_trim_all_free(sb, group, first_block, |
4885 | last_block, minlen); | 4944 | last_block, minlen); |
4886 | if (cnt < 0) { | 4945 | if (cnt < 0) { |
4887 | ret = cnt; | 4946 | ret = cnt; |
4888 | ext4_mb_unload_buddy(&e4b); | ||
4889 | break; | 4947 | break; |
4890 | } | 4948 | } |
4891 | } | 4949 | } |
4892 | ext4_mb_unload_buddy(&e4b); | ||
4893 | trimmed += cnt; | 4950 | trimmed += cnt; |
4894 | first_block = 0; | 4951 | first_block = 0; |
4895 | } | 4952 | } |