io_uring: consider the overflow of sequence for timeout req

Now we recalculate the sequence of timeout with 'req->sequence = ctx->cached_sq_head + count - 1', judge the right place to insert for timeout_list by compare the number of request we still expected for completion. But we have not consider about the situation of overflow: 1. ctx->cached_sq_head + count - 1 may overflow. And a bigger count for the new timeout req can have a small req->sequence. 2. cached_sq_head of now may overflow compare with before req. And it will lead the timeout req with small req->sequence. This overflow will lead to the misorder of timeout_list, which can lead to the wrong order of the completion of timeout_list. Fix it by reuse req->submit.sequence to store the count, and change the logic of inserting sort in io_timeout. Signed-off-by: yangerkun <yangerkun@huawei.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: yangerkun <yangerkun@huawei.com> 2019-10-15 09:59:29 -0400
committer: Jens Axboe <axboe@kernel.dk> 2019-10-15 10:55:50 -0400
commit: 5da0fb1ab34ccfe6d49210b4f5a739c59fcbf25e (patch)
tree: c9eaf517afbc8c7dc8e2f9633ec17d6f0416e647 /fs
parent: 7a7c5e715e722c86d602c56a09e77f000364e263 (diff)
1 files changed, 21 insertions, 6 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 38d274fc0f25..d2cb277da2f4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1884,7 +1884,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
 static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-        unsigned count, req_dist, tail_index;
+        unsigned count;
        struct io_ring_ctx *ctx = req->ctx;
        struct list_head *entry;
        struct timespec64 ts;
@@ -1907,21 +1907,36 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                count = 1;
        req->sequence = ctx->cached_sq_head + count - 1;
+        /* reuse it to store the count */
+        req->submit.sequence = count;
        req->flags |= REQ_F_TIMEOUT;
        /*
         * Insertion sort, ensuring the first entry in the list is always
         * the one we need first.
         */
-        tail_index = ctx->cached_cq_tail - ctx->rings->sq_dropped;
-        req_dist = req->sequence - tail_index;
        spin_lock_irq(&ctx->completion_lock);
        list_for_each_prev(entry, &ctx->timeout_list) {
                struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
-                unsigned dist;
+                unsigned nxt_sq_head;
+                long long tmp, tmp_nxt;
-                dist = nxt->sequence - tail_index;
+                /*
-                if (req_dist >= dist)
+                 * Since cached_sq_head + count - 1 can overflow, use type long
+                 * long to store it.
+                 */
+                tmp = (long long)ctx->cached_sq_head + count - 1;
+                nxt_sq_head = nxt->sequence - nxt->submit.sequence + 1;
+                tmp_nxt = (long long)nxt_sq_head + nxt->submit.sequence - 1;
+                /*
+                 * cached_sq_head may overflow, and it will never overflow twice
+                 * once there is some timeout req still be valid.
+                 */
+                if (ctx->cached_sq_head < nxt_sq_head)
+                        tmp_nxt += UINT_MAX;
+                if (tmp >= tmp_nxt)
                        break;
        }
        list_add(&req->list, entry);
author	yangerkun <yangerkun@huawei.com>	2019-10-15 09:59:29 -0400
committer	Jens Axboe <axboe@kernel.dk>	2019-10-15 10:55:50 -0400
commit	5da0fb1ab34ccfe6d49210b4f5a739c59fcbf25e (patch)
tree	c9eaf517afbc8c7dc8e2f9633ec17d6f0416e647 /fs
parent	7a7c5e715e722c86d602c56a09e77f000364e263 (diff)