diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-27 15:08:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-27 15:08:24 -0400 |
commit | 738f531d877ac2b228b25354dfa4da6e79a2c369 (patch) | |
tree | cea724f0bee623d6694a6cf434992727a8475864 | |
parent | 47db9b9a6eba4c5b0872220c8c8ff787a4b06ab0 (diff) | |
parent | bda521624e75c665c407b3d9cece6e7a28178cd8 (diff) |
Merge tag 'for-5.4/io_uring-2019-09-27' of git://git.kernel.dk/linux-block
Pull more io_uring updates from Jens Axboe:
"Just two things in here:
- Improvement to the io_uring CQ ring wakeup for batched IO (me)
- Fix wrong comparison in poll handling (yangerkun)
I realize the first one is a little late in the game, but it felt
pointless to hold it off until the next release. Went through various
testing and reviews with Pavel and peterz"
* tag 'for-5.4/io_uring-2019-09-27' of git://git.kernel.dk/linux-block:
io_uring: make CQ ring wakeups be more efficient
io_uring: compare cached_cq_tail with cq.head in_io_uring_poll
-rw-r--r-- | fs/io_uring.c | 68 |
1 files changed, 57 insertions, 11 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index dd094b387cab..aa8ac557493c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c | |||
@@ -2768,6 +2768,38 @@ out: | |||
2768 | return submit; | 2768 | return submit; |
2769 | } | 2769 | } |
2770 | 2770 | ||
2771 | struct io_wait_queue { | ||
2772 | struct wait_queue_entry wq; | ||
2773 | struct io_ring_ctx *ctx; | ||
2774 | unsigned to_wait; | ||
2775 | unsigned nr_timeouts; | ||
2776 | }; | ||
2777 | |||
2778 | static inline bool io_should_wake(struct io_wait_queue *iowq) | ||
2779 | { | ||
2780 | struct io_ring_ctx *ctx = iowq->ctx; | ||
2781 | |||
2782 | /* | ||
2783 | * Wake up if we have enough events, or if a timeout occured since we | ||
2784 | * started waiting. For timeouts, we always want to return to userspace, | ||
2785 | * regardless of event count. | ||
2786 | */ | ||
2787 | return io_cqring_events(ctx->rings) >= iowq->to_wait || | ||
2788 | atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; | ||
2789 | } | ||
2790 | |||
2791 | static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, | ||
2792 | int wake_flags, void *key) | ||
2793 | { | ||
2794 | struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, | ||
2795 | wq); | ||
2796 | |||
2797 | if (!io_should_wake(iowq)) | ||
2798 | return -1; | ||
2799 | |||
2800 | return autoremove_wake_function(curr, mode, wake_flags, key); | ||
2801 | } | ||
2802 | |||
2771 | /* | 2803 | /* |
2772 | * Wait until events become available, if we don't already have some. The | 2804 | * Wait until events become available, if we don't already have some. The |
2773 | * application must reap them itself, as they reside on the shared cq ring. | 2805 | * application must reap them itself, as they reside on the shared cq ring. |
@@ -2775,8 +2807,16 @@ out: | |||
2775 | static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, | 2807 | static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, |
2776 | const sigset_t __user *sig, size_t sigsz) | 2808 | const sigset_t __user *sig, size_t sigsz) |
2777 | { | 2809 | { |
2810 | struct io_wait_queue iowq = { | ||
2811 | .wq = { | ||
2812 | .private = current, | ||
2813 | .func = io_wake_function, | ||
2814 | .entry = LIST_HEAD_INIT(iowq.wq.entry), | ||
2815 | }, | ||
2816 | .ctx = ctx, | ||
2817 | .to_wait = min_events, | ||
2818 | }; | ||
2778 | struct io_rings *rings = ctx->rings; | 2819 | struct io_rings *rings = ctx->rings; |
2779 | unsigned nr_timeouts; | ||
2780 | int ret; | 2820 | int ret; |
2781 | 2821 | ||
2782 | if (io_cqring_events(rings) >= min_events) | 2822 | if (io_cqring_events(rings) >= min_events) |
@@ -2795,15 +2835,21 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, | |||
2795 | return ret; | 2835 | return ret; |
2796 | } | 2836 | } |
2797 | 2837 | ||
2798 | nr_timeouts = atomic_read(&ctx->cq_timeouts); | 2838 | ret = 0; |
2799 | /* | 2839 | iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); |
2800 | * Return if we have enough events, or if a timeout occured since | 2840 | do { |
2801 | * we started waiting. For timeouts, we always want to return to | 2841 | prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, |
2802 | * userspace. | 2842 | TASK_INTERRUPTIBLE); |
2803 | */ | 2843 | if (io_should_wake(&iowq)) |
2804 | ret = wait_event_interruptible(ctx->wait, | 2844 | break; |
2805 | io_cqring_events(rings) >= min_events || | 2845 | schedule(); |
2806 | atomic_read(&ctx->cq_timeouts) != nr_timeouts); | 2846 | if (signal_pending(current)) { |
2847 | ret = -ERESTARTSYS; | ||
2848 | break; | ||
2849 | } | ||
2850 | } while (1); | ||
2851 | finish_wait(&ctx->wait, &iowq.wq); | ||
2852 | |||
2807 | restore_saved_sigmask_unless(ret == -ERESTARTSYS); | 2853 | restore_saved_sigmask_unless(ret == -ERESTARTSYS); |
2808 | if (ret == -ERESTARTSYS) | 2854 | if (ret == -ERESTARTSYS) |
2809 | ret = -EINTR; | 2855 | ret = -EINTR; |
@@ -3455,7 +3501,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) | |||
3455 | if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head != | 3501 | if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head != |
3456 | ctx->rings->sq_ring_entries) | 3502 | ctx->rings->sq_ring_entries) |
3457 | mask |= EPOLLOUT | EPOLLWRNORM; | 3503 | mask |= EPOLLOUT | EPOLLWRNORM; |
3458 | if (READ_ONCE(ctx->rings->sq.head) != ctx->cached_cq_tail) | 3504 | if (READ_ONCE(ctx->rings->cq.head) != ctx->cached_cq_tail) |
3459 | mask |= EPOLLIN | EPOLLRDNORM; | 3505 | mask |= EPOLLIN | EPOLLRDNORM; |
3460 | 3506 | ||
3461 | return mask; | 3507 | return mask; |