Merge tag 'for-5.4/io_uring-2019-09-27' of git://git.kernel.dk/linux-block

Pull more io_uring updates from Jens Axboe: "Just two things in here: - Improvement to the io_uring CQ ring wakeup for batched IO (me) - Fix wrong comparison in poll handling (yangerkun) I realize the first one is a little late in the game, but it felt pointless to hold it off until the next release. Went through various testing and reviews with Pavel and peterz" * tag 'for-5.4/io_uring-2019-09-27' of git://git.kernel.dk/linux-block: io_uring: make CQ ring wakeups be more efficient io_uring: compare cached_cq_tail with cq.head in_io_uring_poll
author: Linus Torvalds <torvalds@linux-foundation.org> 2019-09-27 15:08:24 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2019-09-27 15:08:24 -0400
commit: 738f531d877ac2b228b25354dfa4da6e79a2c369 (patch)
tree: cea724f0bee623d6694a6cf434992727a8475864
parent: 47db9b9a6eba4c5b0872220c8c8ff787a4b06ab0 (diff)
parent: bda521624e75c665c407b3d9cece6e7a28178cd8 (diff)
1 files changed, 57 insertions, 11 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index dd094b387cab..aa8ac557493c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2768,6 +2768,38 @@ out:
        return submit;
 }
+struct io_wait_queue {
+        struct wait_queue_entry wq;
+        struct io_ring_ctx *ctx;
+        unsigned to_wait;
+        unsigned nr_timeouts;
+};
+static inline bool io_should_wake(struct io_wait_queue *iowq)
+{
+        struct io_ring_ctx *ctx = iowq->ctx;
+        /*
+         * Wake up if we have enough events, or if a timeout occured since we
+         * started waiting. For timeouts, we always want to return to userspace,
+         * regardless of event count.
+         */
+        return io_cqring_events(ctx->rings) >= iowq->to_wait ||
+                        atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
+}
+static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
+                            int wake_flags, void *key)
+{
+        struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
+                                                        wq);
+        if (!io_should_wake(iowq))
+                return -1;
+        return autoremove_wake_function(curr, mode, wake_flags, key);
+}
 /*
 * Wait until events become available, if we don't already have some. The
 * application must reap them itself, as they reside on the shared cq ring.
@@ -2775,8 +2807,16 @@ out:
 static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                          const sigset_t __user *sig, size_t sigsz)
 {
+        struct io_wait_queue iowq = {
+                .wq = {
+                        .private        = current,
+                        .func           = io_wake_function,
+                        .entry          = LIST_HEAD_INIT(iowq.wq.entry),
+                },
+                .ctx            = ctx,
+                .to_wait        = min_events,
+        };
        struct io_rings *rings = ctx->rings;
-        unsigned nr_timeouts;
        int ret;
        if (io_cqring_events(rings) >= min_events)
@@ -2795,15 +2835,21 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                        return ret;
        }
-        nr_timeouts = atomic_read(&ctx->cq_timeouts);
+        ret = 0;
-        /*
+        iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
-         * Return if we have enough events, or if a timeout occured since
+        do {
-         * we started waiting. For timeouts, we always want to return to
+                prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
-         * userspace.
+                                                TASK_INTERRUPTIBLE);
-         */
+                if (io_should_wake(&iowq))
-        ret = wait_event_interruptible(ctx->wait,
+                        break;
-                                io_cqring_events(rings) >= min_events ||
+                schedule();
-                                atomic_read(&ctx->cq_timeouts) != nr_timeouts);
+                if (signal_pending(current)) {
+                        ret = -ERESTARTSYS;
+                        break;
+                }
+        } while (1);
+        finish_wait(&ctx->wait, &iowq.wq);
        restore_saved_sigmask_unless(ret == -ERESTARTSYS);
        if (ret == -ERESTARTSYS)
                ret = -EINTR;
@@ -3455,7 +3501,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
        if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head !=
            ctx->rings->sq_ring_entries)
                mask |= EPOLLOUT | EPOLLWRNORM;
-        if (READ_ONCE(ctx->rings->sq.head) != ctx->cached_cq_tail)
+        if (READ_ONCE(ctx->rings->cq.head) != ctx->cached_cq_tail)
                mask |= EPOLLIN | EPOLLRDNORM;
        return mask;
author	Linus Torvalds <torvalds@linux-foundation.org>	2019-09-27 15:08:24 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2019-09-27 15:08:24 -0400
commit	738f531d877ac2b228b25354dfa4da6e79a2c369 (patch)
tree	cea724f0bee623d6694a6cf434992727a8475864
parent	47db9b9a6eba4c5b0872220c8c8ff787a4b06ab0 (diff)
parent	bda521624e75c665c407b3d9cece6e7a28178cd8 (diff)

diff --git a/fs/io_uring.c b/fs/io_uring.c index dd094b387cab..aa8ac557493c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c
@@ -2768,6 +2768,38 @@ out:
2768	return submit;	2768	return submit;
2769	}	2769	}
2770		2770
		2771	struct io_wait_queue {
		2772	struct wait_queue_entry wq;
		2773	struct io_ring_ctx *ctx;
		2774	unsigned to_wait;
		2775	unsigned nr_timeouts;
		2776	};
		2777
		2778	static inline bool io_should_wake(struct io_wait_queue *iowq)
		2779	{
		2780	struct io_ring_ctx *ctx = iowq->ctx;
		2781
		2782	/*
		2783	* Wake up if we have enough events, or if a timeout occured since we
		2784	* started waiting. For timeouts, we always want to return to userspace,
		2785	* regardless of event count.
		2786	*/
		2787	return io_cqring_events(ctx->rings) >= iowq->to_wait \|\|
		2788	atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
		2789	}
		2790
		2791	static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
		2792	int wake_flags, void *key)
		2793	{
		2794	struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
		2795	wq);
		2796
		2797	if (!io_should_wake(iowq))
		2798	return -1;
		2799
		2800	return autoremove_wake_function(curr, mode, wake_flags, key);
		2801	}
		2802
2771	/*	2803	/*
2772	* Wait until events become available, if we don't already have some. The	2804	* Wait until events become available, if we don't already have some. The
2773	* application must reap them itself, as they reside on the shared cq ring.	2805	* application must reap them itself, as they reside on the shared cq ring.
@@ -2775,8 +2807,16 @@ out:
2775	static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,	2807	static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
2776	const sigset_t __user *sig, size_t sigsz)	2808	const sigset_t __user *sig, size_t sigsz)
2777	{	2809	{
		2810	struct io_wait_queue iowq = {
		2811	.wq = {
		2812	.private = current,
		2813	.func = io_wake_function,
		2814	.entry = LIST_HEAD_INIT(iowq.wq.entry),
		2815	},
		2816	.ctx = ctx,
		2817	.to_wait = min_events,
		2818	};
2778	struct io_rings *rings = ctx->rings;	2819	struct io_rings *rings = ctx->rings;
2779	unsigned nr_timeouts;
2780	int ret;	2820	int ret;
2781		2821
2782	if (io_cqring_events(rings) >= min_events)	2822	if (io_cqring_events(rings) >= min_events)
@@ -2795,15 +2835,21 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
2795	return ret;	2835	return ret;
2796	}	2836	}
2797		2837
2798	nr_timeouts = atomic_read(&ctx->cq_timeouts);	2838	ret = 0;
2799	/*	2839	iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
2800	* Return if we have enough events, or if a timeout occured since	2840	do {
2801	* we started waiting. For timeouts, we always want to return to	2841	prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
2802	* userspace.	2842	TASK_INTERRUPTIBLE);
2803	*/	2843	if (io_should_wake(&iowq))
2804	ret = wait_event_interruptible(ctx->wait,	2844	break;
2805	io_cqring_events(rings) >= min_events \|\|	2845	schedule();
2806	atomic_read(&ctx->cq_timeouts) != nr_timeouts);	2846	if (signal_pending(current)) {
		2847	ret = -ERESTARTSYS;
		2848	break;
		2849	}
		2850	} while (1);
		2851	finish_wait(&ctx->wait, &iowq.wq);
		2852
2807	restore_saved_sigmask_unless(ret == -ERESTARTSYS);	2853	restore_saved_sigmask_unless(ret == -ERESTARTSYS);
2808	if (ret == -ERESTARTSYS)	2854	if (ret == -ERESTARTSYS)
2809	ret = -EINTR;	2855	ret = -EINTR;
@@ -3455,7 +3501,7 @@ static __poll_t io_uring_poll(struct file file, poll_table wait)
3455	if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head !=	3501	if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head !=
3456	ctx->rings->sq_ring_entries)	3502	ctx->rings->sq_ring_entries)
3457	mask \|= EPOLLOUT \| EPOLLWRNORM;	3503	mask \|= EPOLLOUT \| EPOLLWRNORM;
3458	if (READ_ONCE(ctx->rings->sq.head) != ctx->cached_cq_tail)	3504	if (READ_ONCE(ctx->rings->cq.head) != ctx->cached_cq_tail)
3459	mask \|= EPOLLIN \| EPOLLRDNORM;	3505	mask \|= EPOLLIN \| EPOLLRDNORM;
3460		3506
3461	return mask;	3507	return mask;