diff options
author | Jens Axboe <axboe@kernel.dk> | 2019-09-10 11:15:04 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2019-09-10 11:49:35 -0400 |
commit | 54a91f3bb9b96ed86bc12b2f7e06b3fce8e86503 (patch) | |
tree | c4d5a47fb9d06ec67ab341dc9490590f8c138a6a /fs/io_uring.c | |
parent | 18d9be1a970c3704366df902b00871bea88d9f14 (diff) |
io_uring: limit parallelism of buffered writes
All the popular filesystems need to grab the inode lock for buffered
writes. With io_uring punting buffered writes to async context, we
observe a lot of contention with all workers hamming this mutex.
For buffered writes, we generally don't need a lot of parallelism on
the submission side, as the flushing will take care of that for us.
Hence we don't need a deep queue on the write side, as long as we
can safely punt from the original submission context.
Add a workqueue with a limit of 2 that we can use for buffered writes.
This greatly improves the performance and efficiency of higher queue
depth buffered async writes with io_uring.
Reported-by: Andres Freund <andres@anarazel.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 47 |
1 files changed, 39 insertions, 8 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 41840bf26d3b..03fcd974fd1d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c | |||
@@ -203,7 +203,7 @@ struct io_ring_ctx { | |||
203 | } ____cacheline_aligned_in_smp; | 203 | } ____cacheline_aligned_in_smp; |
204 | 204 | ||
205 | /* IO offload */ | 205 | /* IO offload */ |
206 | struct workqueue_struct *sqo_wq; | 206 | struct workqueue_struct *sqo_wq[2]; |
207 | struct task_struct *sqo_thread; /* if using sq thread polling */ | 207 | struct task_struct *sqo_thread; /* if using sq thread polling */ |
208 | struct mm_struct *sqo_mm; | 208 | struct mm_struct *sqo_mm; |
209 | wait_queue_head_t sqo_wait; | 209 | wait_queue_head_t sqo_wait; |
@@ -446,7 +446,19 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) | |||
446 | static inline void io_queue_async_work(struct io_ring_ctx *ctx, | 446 | static inline void io_queue_async_work(struct io_ring_ctx *ctx, |
447 | struct io_kiocb *req) | 447 | struct io_kiocb *req) |
448 | { | 448 | { |
449 | queue_work(ctx->sqo_wq, &req->work); | 449 | int rw; |
450 | |||
451 | switch (req->submit.sqe->opcode) { | ||
452 | case IORING_OP_WRITEV: | ||
453 | case IORING_OP_WRITE_FIXED: | ||
454 | rw = !(req->rw.ki_flags & IOCB_DIRECT); | ||
455 | break; | ||
456 | default: | ||
457 | rw = 0; | ||
458 | break; | ||
459 | } | ||
460 | |||
461 | queue_work(ctx->sqo_wq[rw], &req->work); | ||
450 | } | 462 | } |
451 | 463 | ||
452 | static void io_commit_cqring(struct io_ring_ctx *ctx) | 464 | static void io_commit_cqring(struct io_ring_ctx *ctx) |
@@ -2634,11 +2646,15 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx) | |||
2634 | 2646 | ||
2635 | static void io_finish_async(struct io_ring_ctx *ctx) | 2647 | static void io_finish_async(struct io_ring_ctx *ctx) |
2636 | { | 2648 | { |
2649 | int i; | ||
2650 | |||
2637 | io_sq_thread_stop(ctx); | 2651 | io_sq_thread_stop(ctx); |
2638 | 2652 | ||
2639 | if (ctx->sqo_wq) { | 2653 | for (i = 0; i < ARRAY_SIZE(ctx->sqo_wq); i++) { |
2640 | destroy_workqueue(ctx->sqo_wq); | 2654 | if (ctx->sqo_wq[i]) { |
2641 | ctx->sqo_wq = NULL; | 2655 | destroy_workqueue(ctx->sqo_wq[i]); |
2656 | ctx->sqo_wq[i] = NULL; | ||
2657 | } | ||
2642 | } | 2658 | } |
2643 | } | 2659 | } |
2644 | 2660 | ||
@@ -2846,16 +2862,31 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, | |||
2846 | } | 2862 | } |
2847 | 2863 | ||
2848 | /* Do QD, or 2 * CPUS, whatever is smallest */ | 2864 | /* Do QD, or 2 * CPUS, whatever is smallest */ |
2849 | ctx->sqo_wq = alloc_workqueue("io_ring-wq", WQ_UNBOUND | WQ_FREEZABLE, | 2865 | ctx->sqo_wq[0] = alloc_workqueue("io_ring-wq", |
2866 | WQ_UNBOUND | WQ_FREEZABLE, | ||
2850 | min(ctx->sq_entries - 1, 2 * num_online_cpus())); | 2867 | min(ctx->sq_entries - 1, 2 * num_online_cpus())); |
2851 | if (!ctx->sqo_wq) { | 2868 | if (!ctx->sqo_wq[0]) { |
2869 | ret = -ENOMEM; | ||
2870 | goto err; | ||
2871 | } | ||
2872 | |||
2873 | /* | ||
2874 | * This is for buffered writes, where we want to limit the parallelism | ||
2875 | * due to file locking in file systems. As "normal" buffered writes | ||
2876 | * should parellelize on writeout quite nicely, limit us to having 2 | ||
2877 | * pending. This avoids massive contention on the inode when doing | ||
2878 | * buffered async writes. | ||
2879 | */ | ||
2880 | ctx->sqo_wq[1] = alloc_workqueue("io_ring-write-wq", | ||
2881 | WQ_UNBOUND | WQ_FREEZABLE, 2); | ||
2882 | if (!ctx->sqo_wq[1]) { | ||
2852 | ret = -ENOMEM; | 2883 | ret = -ENOMEM; |
2853 | goto err; | 2884 | goto err; |
2854 | } | 2885 | } |
2855 | 2886 | ||
2856 | return 0; | 2887 | return 0; |
2857 | err: | 2888 | err: |
2858 | io_sq_thread_stop(ctx); | 2889 | io_finish_async(ctx); |
2859 | mmdrop(ctx->sqo_mm); | 2890 | mmdrop(ctx->sqo_mm); |
2860 | ctx->sqo_mm = NULL; | 2891 | ctx->sqo_mm = NULL; |
2861 | return ret; | 2892 | return ret; |