summaryrefslogtreecommitdiffstats
path: root/fs/io_uring.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2019-09-10 11:15:04 -0400
committerJens Axboe <axboe@kernel.dk>2019-09-10 11:49:35 -0400
commit54a91f3bb9b96ed86bc12b2f7e06b3fce8e86503 (patch)
treec4d5a47fb9d06ec67ab341dc9490590f8c138a6a /fs/io_uring.c
parent18d9be1a970c3704366df902b00871bea88d9f14 (diff)
io_uring: limit parallelism of buffered writes
All the popular filesystems need to grab the inode lock for buffered writes. With io_uring punting buffered writes to async context, we observe a lot of contention with all workers hamming this mutex. For buffered writes, we generally don't need a lot of parallelism on the submission side, as the flushing will take care of that for us. Hence we don't need a deep queue on the write side, as long as we can safely punt from the original submission context. Add a workqueue with a limit of 2 that we can use for buffered writes. This greatly improves the performance and efficiency of higher queue depth buffered async writes with io_uring. Reported-by: Andres Freund <andres@anarazel.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c47
1 files changed, 39 insertions, 8 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 41840bf26d3b..03fcd974fd1d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -203,7 +203,7 @@ struct io_ring_ctx {
203 } ____cacheline_aligned_in_smp; 203 } ____cacheline_aligned_in_smp;
204 204
205 /* IO offload */ 205 /* IO offload */
206 struct workqueue_struct *sqo_wq; 206 struct workqueue_struct *sqo_wq[2];
207 struct task_struct *sqo_thread; /* if using sq thread polling */ 207 struct task_struct *sqo_thread; /* if using sq thread polling */
208 struct mm_struct *sqo_mm; 208 struct mm_struct *sqo_mm;
209 wait_queue_head_t sqo_wait; 209 wait_queue_head_t sqo_wait;
@@ -446,7 +446,19 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
446static inline void io_queue_async_work(struct io_ring_ctx *ctx, 446static inline void io_queue_async_work(struct io_ring_ctx *ctx,
447 struct io_kiocb *req) 447 struct io_kiocb *req)
448{ 448{
449 queue_work(ctx->sqo_wq, &req->work); 449 int rw;
450
451 switch (req->submit.sqe->opcode) {
452 case IORING_OP_WRITEV:
453 case IORING_OP_WRITE_FIXED:
454 rw = !(req->rw.ki_flags & IOCB_DIRECT);
455 break;
456 default:
457 rw = 0;
458 break;
459 }
460
461 queue_work(ctx->sqo_wq[rw], &req->work);
450} 462}
451 463
452static void io_commit_cqring(struct io_ring_ctx *ctx) 464static void io_commit_cqring(struct io_ring_ctx *ctx)
@@ -2634,11 +2646,15 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx)
2634 2646
2635static void io_finish_async(struct io_ring_ctx *ctx) 2647static void io_finish_async(struct io_ring_ctx *ctx)
2636{ 2648{
2649 int i;
2650
2637 io_sq_thread_stop(ctx); 2651 io_sq_thread_stop(ctx);
2638 2652
2639 if (ctx->sqo_wq) { 2653 for (i = 0; i < ARRAY_SIZE(ctx->sqo_wq); i++) {
2640 destroy_workqueue(ctx->sqo_wq); 2654 if (ctx->sqo_wq[i]) {
2641 ctx->sqo_wq = NULL; 2655 destroy_workqueue(ctx->sqo_wq[i]);
2656 ctx->sqo_wq[i] = NULL;
2657 }
2642 } 2658 }
2643} 2659}
2644 2660
@@ -2846,16 +2862,31 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
2846 } 2862 }
2847 2863
2848 /* Do QD, or 2 * CPUS, whatever is smallest */ 2864 /* Do QD, or 2 * CPUS, whatever is smallest */
2849 ctx->sqo_wq = alloc_workqueue("io_ring-wq", WQ_UNBOUND | WQ_FREEZABLE, 2865 ctx->sqo_wq[0] = alloc_workqueue("io_ring-wq",
2866 WQ_UNBOUND | WQ_FREEZABLE,
2850 min(ctx->sq_entries - 1, 2 * num_online_cpus())); 2867 min(ctx->sq_entries - 1, 2 * num_online_cpus()));
2851 if (!ctx->sqo_wq) { 2868 if (!ctx->sqo_wq[0]) {
2869 ret = -ENOMEM;
2870 goto err;
2871 }
2872
2873 /*
2874 * This is for buffered writes, where we want to limit the parallelism
2875 * due to file locking in file systems. As "normal" buffered writes
2876 * should parellelize on writeout quite nicely, limit us to having 2
2877 * pending. This avoids massive contention on the inode when doing
2878 * buffered async writes.
2879 */
2880 ctx->sqo_wq[1] = alloc_workqueue("io_ring-write-wq",
2881 WQ_UNBOUND | WQ_FREEZABLE, 2);
2882 if (!ctx->sqo_wq[1]) {
2852 ret = -ENOMEM; 2883 ret = -ENOMEM;
2853 goto err; 2884 goto err;
2854 } 2885 }
2855 2886
2856 return 0; 2887 return 0;
2857err: 2888err:
2858 io_sq_thread_stop(ctx); 2889 io_finish_async(ctx);
2859 mmdrop(ctx->sqo_mm); 2890 mmdrop(ctx->sqo_mm);
2860 ctx->sqo_mm = NULL; 2891 ctx->sqo_mm = NULL;
2861 return ret; 2892 return ret;