summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2019-08-08 15:03:00 -0400
committerJens Axboe <axboe@kernel.dk>2019-08-14 10:50:01 -0400
commitb8e24a9300b0836a9d39f6b20746766b3b81f1bd (patch)
tree2b992e084f3e88eca44d34c427745fed595e6149
parent73d9c8d4c0017e21e1ff519474ceb1450484dc9a (diff)
block: annotate refault stalls from IO submission
psi tracks the time tasks wait for refaulting pages to become uptodate, but it does not track the time spent submitting the IO. The submission part can be significant if backing storage is contended or when cgroup throttling (io.latency) is in effect - a lot of time is spent in submit_bio(). In that case, we underreport memory pressure. Annotate submit_bio() to account submission time as memory stall when the bio is reading userspace workingset pages. Tested-by: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/bio.c3
-rw-r--r--block/blk-core.c23
-rw-r--r--include/linux/blk_types.h1
3 files changed, 26 insertions, 1 deletions
diff --git a/block/bio.c b/block/bio.c
index 24a496f5d2e2..54769659a434 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -806,6 +806,9 @@ void __bio_add_page(struct bio *bio, struct page *page,
806 806
807 bio->bi_iter.bi_size += len; 807 bio->bi_iter.bi_size += len;
808 bio->bi_vcnt++; 808 bio->bi_vcnt++;
809
810 if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
811 bio_set_flag(bio, BIO_WORKINGSET);
809} 812}
810EXPORT_SYMBOL_GPL(__bio_add_page); 813EXPORT_SYMBOL_GPL(__bio_add_page);
811 814
diff --git a/block/blk-core.c b/block/blk-core.c
index 919629ce4015..834aea04718f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -36,6 +36,7 @@
36#include <linux/blk-cgroup.h> 36#include <linux/blk-cgroup.h>
37#include <linux/debugfs.h> 37#include <linux/debugfs.h>
38#include <linux/bpf.h> 38#include <linux/bpf.h>
39#include <linux/psi.h>
39 40
40#define CREATE_TRACE_POINTS 41#define CREATE_TRACE_POINTS
41#include <trace/events/block.h> 42#include <trace/events/block.h>
@@ -1134,6 +1135,10 @@ EXPORT_SYMBOL_GPL(direct_make_request);
1134 */ 1135 */
1135blk_qc_t submit_bio(struct bio *bio) 1136blk_qc_t submit_bio(struct bio *bio)
1136{ 1137{
1138 bool workingset_read = false;
1139 unsigned long pflags;
1140 blk_qc_t ret;
1141
1137 if (blkcg_punt_bio_submit(bio)) 1142 if (blkcg_punt_bio_submit(bio))
1138 return BLK_QC_T_NONE; 1143 return BLK_QC_T_NONE;
1139 1144
@@ -1152,6 +1157,8 @@ blk_qc_t submit_bio(struct bio *bio)
1152 if (op_is_write(bio_op(bio))) { 1157 if (op_is_write(bio_op(bio))) {
1153 count_vm_events(PGPGOUT, count); 1158 count_vm_events(PGPGOUT, count);
1154 } else { 1159 } else {
1160 if (bio_flagged(bio, BIO_WORKINGSET))
1161 workingset_read = true;
1155 task_io_account_read(bio->bi_iter.bi_size); 1162 task_io_account_read(bio->bi_iter.bi_size);
1156 count_vm_events(PGPGIN, count); 1163 count_vm_events(PGPGIN, count);
1157 } 1164 }
@@ -1166,7 +1173,21 @@ blk_qc_t submit_bio(struct bio *bio)
1166 } 1173 }
1167 } 1174 }
1168 1175
1169 return generic_make_request(bio); 1176 /*
1177 * If we're reading data that is part of the userspace
1178 * workingset, count submission time as memory stall. When the
1179 * device is congested, or the submitting cgroup IO-throttled,
1180 * submission can be a significant part of overall IO time.
1181 */
1182 if (workingset_read)
1183 psi_memstall_enter(&pflags);
1184
1185 ret = generic_make_request(bio);
1186
1187 if (workingset_read)
1188 psi_memstall_leave(&pflags);
1189
1190 return ret;
1170} 1191}
1171EXPORT_SYMBOL(submit_bio); 1192EXPORT_SYMBOL(submit_bio);
1172 1193
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d6ce7b3ec8b1..5a1118d4ef7e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -209,6 +209,7 @@ enum {
209 BIO_BOUNCED, /* bio is a bounce bio */ 209 BIO_BOUNCED, /* bio is a bounce bio */
210 BIO_USER_MAPPED, /* contains user pages */ 210 BIO_USER_MAPPED, /* contains user pages */
211 BIO_NULL_MAPPED, /* contains invalid user pages */ 211 BIO_NULL_MAPPED, /* contains invalid user pages */
212 BIO_WORKINGSET, /* contains userspace workingset pages */
212 BIO_QUIET, /* Make BIO Quiet */ 213 BIO_QUIET, /* Make BIO Quiet */
213 BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ 214 BIO_CHAIN, /* chained bio, ->bi_remaining in effect */
214 BIO_REFFED, /* bio has elevated ->bi_cnt */ 215 BIO_REFFED, /* bio has elevated ->bi_cnt */