aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChao Yu <yuchao0@huawei.com>2018-08-06 10:43:50 -0400
committerJaegeuk Kim <jaegeuk@kernel.org>2018-08-13 13:48:17 -0400
commit35ec7d5748849762008e8ae9f8ad2766229d5794 (patch)
tree0038876fb5399028e19155c391efe79cd5b994cb
parenta690efffd16302d23b0bbd00f84dcedc11935a8b (diff)
f2fs: split discard command in prior to block layer
Some devices has small max_{hw,}discard_sectors, so that in __blkdev_issue_discard(), one big size discard bio can be split into multiple small size discard bios, result in heavy load in IO scheduler and device, which can hang other sync IO for long time. Now, f2fs is trying to control discard commands more elaboratively, in order to make less conflict in between discard IO and user IO to enhance application's performance, so in this patch, we will split discard bio in f2fs in prior to in block layer to reduce issuing multiple discard bios in a short time. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
-rw-r--r--fs/f2fs/f2fs.h23
-rw-r--r--fs/f2fs/segment.c148
2 files changed, 127 insertions, 44 deletions
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d9df58163f29..9fb780317b4e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -178,7 +178,6 @@ enum {
178 178
179#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) 179#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
180#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ 180#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
181#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */
182#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ 181#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
183#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */ 182#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */
184#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ 183#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
@@ -250,9 +249,10 @@ struct discard_entry {
250 (MAX_PLIST_NUM - 1) : (blk_num - 1)) 249 (MAX_PLIST_NUM - 1) : (blk_num - 1))
251 250
252enum { 251enum {
253 D_PREP, 252 D_PREP, /* initial */
254 D_SUBMIT, 253 D_PARTIAL, /* partially submitted */
255 D_DONE, 254 D_SUBMIT, /* all submitted */
255 D_DONE, /* finished */
256}; 256};
257 257
258struct discard_info { 258struct discard_info {
@@ -277,7 +277,10 @@ struct discard_cmd {
277 struct block_device *bdev; /* bdev */ 277 struct block_device *bdev; /* bdev */
278 unsigned short ref; /* reference count */ 278 unsigned short ref; /* reference count */
279 unsigned char state; /* state */ 279 unsigned char state; /* state */
280 unsigned char issuing; /* issuing discard */
280 int error; /* bio error */ 281 int error; /* bio error */
282 spinlock_t lock; /* for state/bio_ref updating */
283 unsigned short bio_ref; /* bio reference count */
281}; 284};
282 285
283enum { 286enum {
@@ -710,22 +713,22 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
710} 713}
711 714
712static inline bool __is_discard_mergeable(struct discard_info *back, 715static inline bool __is_discard_mergeable(struct discard_info *back,
713 struct discard_info *front) 716 struct discard_info *front, unsigned int max_len)
714{ 717{
715 return (back->lstart + back->len == front->lstart) && 718 return (back->lstart + back->len == front->lstart) &&
716 (back->len + front->len < DEF_MAX_DISCARD_LEN); 719 (back->len + front->len <= max_len);
717} 720}
718 721
719static inline bool __is_discard_back_mergeable(struct discard_info *cur, 722static inline bool __is_discard_back_mergeable(struct discard_info *cur,
720 struct discard_info *back) 723 struct discard_info *back, unsigned int max_len)
721{ 724{
722 return __is_discard_mergeable(back, cur); 725 return __is_discard_mergeable(back, cur, max_len);
723} 726}
724 727
725static inline bool __is_discard_front_mergeable(struct discard_info *cur, 728static inline bool __is_discard_front_mergeable(struct discard_info *cur,
726 struct discard_info *front) 729 struct discard_info *front, unsigned int max_len)
727{ 730{
728 return __is_discard_mergeable(cur, front); 731 return __is_discard_mergeable(cur, front, max_len);
729} 732}
730 733
731static inline bool __is_extent_mergeable(struct extent_info *back, 734static inline bool __is_extent_mergeable(struct extent_info *back,
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index eada91dae08a..540d7d6161ba 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -839,9 +839,12 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
839 dc->len = len; 839 dc->len = len;
840 dc->ref = 0; 840 dc->ref = 0;
841 dc->state = D_PREP; 841 dc->state = D_PREP;
842 dc->issuing = 0;
842 dc->error = 0; 843 dc->error = 0;
843 init_completion(&dc->wait); 844 init_completion(&dc->wait);
844 list_add_tail(&dc->list, pend_list); 845 list_add_tail(&dc->list, pend_list);
846 spin_lock_init(&dc->lock);
847 dc->bio_ref = 0;
845 atomic_inc(&dcc->discard_cmd_cnt); 848 atomic_inc(&dcc->discard_cmd_cnt);
846 dcc->undiscard_blks += len; 849 dcc->undiscard_blks += len;
847 850
@@ -868,7 +871,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc,
868 struct discard_cmd *dc) 871 struct discard_cmd *dc)
869{ 872{
870 if (dc->state == D_DONE) 873 if (dc->state == D_DONE)
871 atomic_dec(&dcc->issing_discard); 874 atomic_sub(dc->issuing, &dcc->issing_discard);
872 875
873 list_del(&dc->list); 876 list_del(&dc->list);
874 rb_erase(&dc->rb_node, &dcc->root); 877 rb_erase(&dc->rb_node, &dcc->root);
@@ -883,9 +886,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
883 struct discard_cmd *dc) 886 struct discard_cmd *dc)
884{ 887{
885 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 888 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
889 unsigned long flags;
886 890
887 trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len); 891 trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
888 892
893 spin_lock_irqsave(&dc->lock, flags);
894 if (dc->bio_ref) {
895 spin_unlock_irqrestore(&dc->lock, flags);
896 return;
897 }
898 spin_unlock_irqrestore(&dc->lock, flags);
899
889 f2fs_bug_on(sbi, dc->ref); 900 f2fs_bug_on(sbi, dc->ref);
890 901
891 if (dc->error == -EOPNOTSUPP) 902 if (dc->error == -EOPNOTSUPP)
@@ -901,10 +912,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
901static void f2fs_submit_discard_endio(struct bio *bio) 912static void f2fs_submit_discard_endio(struct bio *bio)
902{ 913{
903 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; 914 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
915 unsigned long flags;
904 916
905 dc->error = blk_status_to_errno(bio->bi_status); 917 dc->error = blk_status_to_errno(bio->bi_status);
906 dc->state = D_DONE; 918
907 complete_all(&dc->wait); 919 spin_lock_irqsave(&dc->lock, flags);
920 dc->bio_ref--;
921 if (!dc->bio_ref && dc->state == D_SUBMIT) {
922 dc->state = D_DONE;
923 complete_all(&dc->wait);
924 }
925 spin_unlock_irqrestore(&dc->lock, flags);
908 bio_put(bio); 926 bio_put(bio);
909} 927}
910 928
@@ -972,17 +990,25 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
972 } 990 }
973} 991}
974 992
975 993static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
994 struct block_device *bdev, block_t lstart,
995 block_t start, block_t len);
976/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ 996/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
977static void __submit_discard_cmd(struct f2fs_sb_info *sbi, 997static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
978 struct discard_policy *dpolicy, 998 struct discard_policy *dpolicy,
979 struct discard_cmd *dc) 999 struct discard_cmd *dc,
1000 unsigned int *issued)
980{ 1001{
1002 struct block_device *bdev = dc->bdev;
1003 struct request_queue *q = bdev_get_queue(bdev);
1004 unsigned int max_discard_blocks =
1005 SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
981 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1006 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
982 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? 1007 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
983 &(dcc->fstrim_list) : &(dcc->wait_list); 1008 &(dcc->fstrim_list) : &(dcc->wait_list);
984 struct bio *bio = NULL;
985 int flag = dpolicy->sync ? REQ_SYNC : 0; 1009 int flag = dpolicy->sync ? REQ_SYNC : 0;
1010 block_t lstart, start, len, total_len;
1011 int err = 0;
986 1012
987 if (dc->state != D_PREP) 1013 if (dc->state != D_PREP)
988 return; 1014 return;
@@ -990,30 +1016,81 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
990 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) 1016 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
991 return; 1017 return;
992 1018
993 trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len); 1019 trace_f2fs_issue_discard(bdev, dc->start, dc->len);
994 1020
995 dc->error = __blkdev_issue_discard(dc->bdev, 1021 lstart = dc->lstart;
996 SECTOR_FROM_BLOCK(dc->start), 1022 start = dc->start;
997 SECTOR_FROM_BLOCK(dc->len), 1023 len = dc->len;
998 GFP_NOFS, 0, &bio); 1024 total_len = len;
999 if (!dc->error) { 1025
1000 /* should keep before submission to avoid D_DONE right away */ 1026 dc->len = 0;
1001 dc->state = D_SUBMIT; 1027
1002 atomic_inc(&dcc->issued_discard); 1028 while (total_len && *issued < dpolicy->max_requests && !err) {
1003 atomic_inc(&dcc->issing_discard); 1029 struct bio *bio = NULL;
1004 if (bio) { 1030 unsigned long flags;
1031 bool last = true;
1032
1033 if (len > max_discard_blocks) {
1034 len = max_discard_blocks;
1035 last = false;
1036 }
1037
1038 (*issued)++;
1039 if (*issued == dpolicy->max_requests)
1040 last = true;
1041
1042 dc->len += len;
1043
1044 err = __blkdev_issue_discard(bdev,
1045 SECTOR_FROM_BLOCK(start),
1046 SECTOR_FROM_BLOCK(len),
1047 GFP_NOFS, 0, &bio);
1048 if (!err && bio) {
1049 /*
1050 * should keep before submission to avoid D_DONE
1051 * right away
1052 */
1053 spin_lock_irqsave(&dc->lock, flags);
1054 if (last)
1055 dc->state = D_SUBMIT;
1056 else
1057 dc->state = D_PARTIAL;
1058 dc->bio_ref++;
1059 spin_unlock_irqrestore(&dc->lock, flags);
1060
1061 atomic_inc(&dcc->issing_discard);
1062 dc->issuing++;
1063 list_move_tail(&dc->list, wait_list);
1064
1065 /* sanity check on discard range */
1066 __check_sit_bitmap(sbi, start, start + len);
1067
1005 bio->bi_private = dc; 1068 bio->bi_private = dc;
1006 bio->bi_end_io = f2fs_submit_discard_endio; 1069 bio->bi_end_io = f2fs_submit_discard_endio;
1007 bio->bi_opf |= flag; 1070 bio->bi_opf |= flag;
1008 submit_bio(bio); 1071 submit_bio(bio);
1009 list_move_tail(&dc->list, wait_list); 1072
1010 __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); 1073 atomic_inc(&dcc->issued_discard);
1011 1074
1012 f2fs_update_iostat(sbi, FS_DISCARD, 1); 1075 f2fs_update_iostat(sbi, FS_DISCARD, 1);
1076 } else {
1077 spin_lock_irqsave(&dc->lock, flags);
1078 if (dc->state == D_PARTIAL)
1079 dc->state = D_SUBMIT;
1080 spin_unlock_irqrestore(&dc->lock, flags);
1081
1082 __remove_discard_cmd(sbi, dc);
1083 err = -EIO;
1013 } 1084 }
1014 } else { 1085
1015 __remove_discard_cmd(sbi, dc); 1086 lstart += len;
1087 start += len;
1088 total_len -= len;
1089 len = total_len;
1016 } 1090 }
1091
1092 if (len)
1093 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1017} 1094}
1018 1095
1019static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, 1096static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
@@ -1094,10 +1171,11 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1094 struct discard_cmd *dc; 1171 struct discard_cmd *dc;
1095 struct discard_info di = {0}; 1172 struct discard_info di = {0};
1096 struct rb_node **insert_p = NULL, *insert_parent = NULL; 1173 struct rb_node **insert_p = NULL, *insert_parent = NULL;
1174 struct request_queue *q = bdev_get_queue(bdev);
1175 unsigned int max_discard_blocks =
1176 SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1097 block_t end = lstart + len; 1177 block_t end = lstart + len;
1098 1178
1099 mutex_lock(&dcc->cmd_lock);
1100
1101 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, 1179 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1102 NULL, lstart, 1180 NULL, lstart,
1103 (struct rb_entry **)&prev_dc, 1181 (struct rb_entry **)&prev_dc,
@@ -1137,7 +1215,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1137 1215
1138 if (prev_dc && prev_dc->state == D_PREP && 1216 if (prev_dc && prev_dc->state == D_PREP &&
1139 prev_dc->bdev == bdev && 1217 prev_dc->bdev == bdev &&
1140 __is_discard_back_mergeable(&di, &prev_dc->di)) { 1218 __is_discard_back_mergeable(&di, &prev_dc->di,
1219 max_discard_blocks)) {
1141 prev_dc->di.len += di.len; 1220 prev_dc->di.len += di.len;
1142 dcc->undiscard_blks += di.len; 1221 dcc->undiscard_blks += di.len;
1143 __relocate_discard_cmd(dcc, prev_dc); 1222 __relocate_discard_cmd(dcc, prev_dc);
@@ -1148,7 +1227,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1148 1227
1149 if (next_dc && next_dc->state == D_PREP && 1228 if (next_dc && next_dc->state == D_PREP &&
1150 next_dc->bdev == bdev && 1229 next_dc->bdev == bdev &&
1151 __is_discard_front_mergeable(&di, &next_dc->di)) { 1230 __is_discard_front_mergeable(&di, &next_dc->di,
1231 max_discard_blocks)) {
1152 next_dc->di.lstart = di.lstart; 1232 next_dc->di.lstart = di.lstart;
1153 next_dc->di.len += di.len; 1233 next_dc->di.len += di.len;
1154 next_dc->di.start = di.start; 1234 next_dc->di.start = di.start;
@@ -1171,8 +1251,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1171 node = rb_next(&prev_dc->rb_node); 1251 node = rb_next(&prev_dc->rb_node);
1172 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); 1252 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1173 } 1253 }
1174
1175 mutex_unlock(&dcc->cmd_lock);
1176} 1254}
1177 1255
1178static int __queue_discard_cmd(struct f2fs_sb_info *sbi, 1256static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
@@ -1187,7 +1265,9 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
1187 1265
1188 blkstart -= FDEV(devi).start_blk; 1266 blkstart -= FDEV(devi).start_blk;
1189 } 1267 }
1268 mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1190 __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); 1269 __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1270 mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1191 return 0; 1271 return 0;
1192} 1272}
1193 1273
@@ -1226,9 +1306,9 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1226 } 1306 }
1227 1307
1228 dcc->next_pos = dc->lstart + dc->len; 1308 dcc->next_pos = dc->lstart + dc->len;
1229 __submit_discard_cmd(sbi, dpolicy, dc); 1309 __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1230 1310
1231 if (++issued >= dpolicy->max_requests) 1311 if (issued >= dpolicy->max_requests)
1232 break; 1312 break;
1233next: 1313next:
1234 node = rb_next(&dc->rb_node); 1314 node = rb_next(&dc->rb_node);
@@ -1283,9 +1363,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1283 break; 1363 break;
1284 } 1364 }
1285 1365
1286 __submit_discard_cmd(sbi, dpolicy, dc); 1366 __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1287 1367
1288 if (++issued >= dpolicy->max_requests) 1368 if (issued >= dpolicy->max_requests)
1289 break; 1369 break;
1290 } 1370 }
1291 blk_finish_plug(&plug); 1371 blk_finish_plug(&plug);
@@ -2492,9 +2572,9 @@ next:
2492 goto skip; 2572 goto skip;
2493 } 2573 }
2494 2574
2495 __submit_discard_cmd(sbi, dpolicy, dc); 2575 __submit_discard_cmd(sbi, dpolicy, dc, &issued);
2496 2576
2497 if (++issued >= dpolicy->max_requests) { 2577 if (issued >= dpolicy->max_requests) {
2498 start = dc->lstart + dc->len; 2578 start = dc->lstart + dc->len;
2499 2579
2500 blk_finish_plug(&plug); 2580 blk_finish_plug(&plug);