io_uring: limit parallelism of buffered writes

All the popular filesystems need to grab the inode lock for buffered writes. With io_uring punting buffered writes to async context, we observe a lot of contention with all workers hamming this mutex. For buffered writes, we generally don't need a lot of parallelism on the submission side, as the flushing will take care of that for us. Hence we don't need a deep queue on the write side, as long as we can safely punt from the original submission context. Add a workqueue with a limit of 2 that we can use for buffered writes. This greatly improves the performance and efficiency of higher queue depth buffered async writes with io_uring. Reported-by: Andres Freund <andres@anarazel.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Jens Axboe <axboe@kernel.dk> 2019-09-10 11:15:04 -0400
committer: Jens Axboe <axboe@kernel.dk> 2019-09-10 11:49:35 -0400
commit: 54a91f3bb9b96ed86bc12b2f7e06b3fce8e86503 (patch)
tree: c4d5a47fb9d06ec67ab341dc9490590f8c138a6a /fs/io_uring.c
parent: 18d9be1a970c3704366df902b00871bea88d9f14 (diff)
1 files changed, 39 insertions, 8 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 41840bf26d3b..03fcd974fd1d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -203,7 +203,7 @@ struct io_ring_ctx {
        } ____cacheline_aligned_in_smp;
        /* IO offload */
-        struct workqueue_struct *sqo_wq;
+        struct workqueue_struct *sqo_wq[2];
        struct task_struct      *sqo_thread;    /* if using sq thread polling */
        struct mm_struct        *sqo_mm;
        wait_queue_head_t       sqo_wait;
@@ -446,7 +446,19 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
 static inline void io_queue_async_work(struct io_ring_ctx *ctx,
                                       struct io_kiocb *req)
 {
-        queue_work(ctx->sqo_wq, &req->work);
+        int rw;
+        switch (req->submit.sqe->opcode) {
+        case IORING_OP_WRITEV:
+        case IORING_OP_WRITE_FIXED:
+                rw = !(req->rw.ki_flags & IOCB_DIRECT);
+                break;
+        default:
+                rw = 0;
+                break;
+        }
+        queue_work(ctx->sqo_wq[rw], &req->work);
 }
 static void io_commit_cqring(struct io_ring_ctx *ctx)
@@ -2634,11 +2646,15 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx)
 static void io_finish_async(struct io_ring_ctx *ctx)
 {
+        int i;
        io_sq_thread_stop(ctx);
-        if (ctx->sqo_wq) {
+        for (i = 0; i < ARRAY_SIZE(ctx->sqo_wq); i++) {
-                destroy_workqueue(ctx->sqo_wq);
+                if (ctx->sqo_wq[i]) {
-                ctx->sqo_wq = NULL;
+                        destroy_workqueue(ctx->sqo_wq[i]);
+                        ctx->sqo_wq[i] = NULL;
+                }
        }
 }
@@ -2846,16 +2862,31 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
        }
        /* Do QD, or 2 * CPUS, whatever is smallest */
-        ctx->sqo_wq = alloc_workqueue("io_ring-wq", WQ_UNBOUND | WQ_FREEZABLE,
+        ctx->sqo_wq[0] = alloc_workqueue("io_ring-wq",
+                        WQ_UNBOUND | WQ_FREEZABLE,
                        min(ctx->sq_entries - 1, 2 * num_online_cpus()));
-        if (!ctx->sqo_wq) {
+        if (!ctx->sqo_wq[0]) {
+                ret = -ENOMEM;
+                goto err;
+        }
+        /*
+         * This is for buffered writes, where we want to limit the parallelism
+         * due to file locking in file systems. As "normal" buffered writes
+         * should parellelize on writeout quite nicely, limit us to having 2
+         * pending. This avoids massive contention on the inode when doing
+         * buffered async writes.
+         */
+        ctx->sqo_wq[1] = alloc_workqueue("io_ring-write-wq",
+                                                WQ_UNBOUND | WQ_FREEZABLE, 2);
+        if (!ctx->sqo_wq[1]) {
                ret = -ENOMEM;
                goto err;
        }
        return 0;
 err:
-        io_sq_thread_stop(ctx);
+        io_finish_async(ctx);
        mmdrop(ctx->sqo_mm);
        ctx->sqo_mm = NULL;
        return ret;
author	Jens Axboe <axboe@kernel.dk>	2019-09-10 11:15:04 -0400
committer	Jens Axboe <axboe@kernel.dk>	2019-09-10 11:49:35 -0400
commit	54a91f3bb9b96ed86bc12b2f7e06b3fce8e86503 (patch)
tree	c4d5a47fb9d06ec67ab341dc9490590f8c138a6a /fs/io_uring.c
parent	18d9be1a970c3704366df902b00871bea88d9f14 (diff)