diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-16 22:10:37 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-16 22:10:37 -0400 |
| commit | a6a4b66bd8f41922c543f7a820c66ed59c25995e (patch) | |
| tree | 1ab2c591cb14eac5b28e5de19d9818e566e79c44 | |
| parent | 1718de78e6235c04ecb7f87a6875fdf90aafe382 (diff) | |
| parent | fdb288a679cdf6a71f3c1ae6f348ba4dae742681 (diff) | |
Merge tag 'for-linus-20190516' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe:
"A small set of fixes for io_uring.
This contains:
- smp_rmb() cleanup for io_cqring_events() (Jackie)
- io_cqring_wait() simplification (Jackie)
- removal of dead 'ev_flags' passing (me)
- SQ poll CPU affinity verification fix (me)
- SQ poll wait fix (Roman)
- SQE command prep cleanup and fix (Stefan)"
* tag 'for-linus-20190516' of git://git.kernel.dk/linux-block:
io_uring: use wait_event_interruptible for cq_wait conditional wait
io_uring: adjust smp_rmb inside io_cqring_events
io_uring: fix infinite wait in khread_park() on io_finish_async()
io_uring: remove 'ev_flags' argument
io_uring: fix failure to verify SQ_AFF cpu
io_uring: fix race condition reading SQE data
| -rw-r--r-- | fs/io_uring.c | 88 |
1 files changed, 31 insertions, 57 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index fdc18321d70c..310f8d17c53e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c | |||
| @@ -231,7 +231,6 @@ struct io_ring_ctx { | |||
| 231 | struct task_struct *sqo_thread; /* if using sq thread polling */ | 231 | struct task_struct *sqo_thread; /* if using sq thread polling */ |
| 232 | struct mm_struct *sqo_mm; | 232 | struct mm_struct *sqo_mm; |
| 233 | wait_queue_head_t sqo_wait; | 233 | wait_queue_head_t sqo_wait; |
| 234 | unsigned sqo_stop; | ||
| 235 | 234 | ||
| 236 | struct { | 235 | struct { |
| 237 | /* CQ ring */ | 236 | /* CQ ring */ |
| @@ -329,9 +328,8 @@ struct io_kiocb { | |||
| 329 | #define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ | 328 | #define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ |
| 330 | #define REQ_F_FIXED_FILE 4 /* ctx owns file */ | 329 | #define REQ_F_FIXED_FILE 4 /* ctx owns file */ |
| 331 | #define REQ_F_SEQ_PREV 8 /* sequential with previous */ | 330 | #define REQ_F_SEQ_PREV 8 /* sequential with previous */ |
| 332 | #define REQ_F_PREPPED 16 /* prep already done */ | 331 | #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ |
| 333 | #define REQ_F_IO_DRAIN 32 /* drain existing IO first */ | 332 | #define REQ_F_IO_DRAINED 32 /* drain done */ |
| 334 | #define REQ_F_IO_DRAINED 64 /* drain done */ | ||
| 335 | u64 user_data; | 333 | u64 user_data; |
| 336 | u32 error; /* iopoll result from callback */ | 334 | u32 error; /* iopoll result from callback */ |
| 337 | u32 sequence; | 335 | u32 sequence; |
| @@ -490,7 +488,7 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) | |||
| 490 | } | 488 | } |
| 491 | 489 | ||
| 492 | static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, | 490 | static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, |
| 493 | long res, unsigned ev_flags) | 491 | long res) |
| 494 | { | 492 | { |
| 495 | struct io_uring_cqe *cqe; | 493 | struct io_uring_cqe *cqe; |
| 496 | 494 | ||
| @@ -503,7 +501,7 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, | |||
| 503 | if (cqe) { | 501 | if (cqe) { |
| 504 | WRITE_ONCE(cqe->user_data, ki_user_data); | 502 | WRITE_ONCE(cqe->user_data, ki_user_data); |
| 505 | WRITE_ONCE(cqe->res, res); | 503 | WRITE_ONCE(cqe->res, res); |
| 506 | WRITE_ONCE(cqe->flags, ev_flags); | 504 | WRITE_ONCE(cqe->flags, 0); |
| 507 | } else { | 505 | } else { |
| 508 | unsigned overflow = READ_ONCE(ctx->cq_ring->overflow); | 506 | unsigned overflow = READ_ONCE(ctx->cq_ring->overflow); |
| 509 | 507 | ||
| @@ -522,12 +520,12 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) | |||
| 522 | } | 520 | } |
| 523 | 521 | ||
| 524 | static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data, | 522 | static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data, |
| 525 | long res, unsigned ev_flags) | 523 | long res) |
| 526 | { | 524 | { |
| 527 | unsigned long flags; | 525 | unsigned long flags; |
| 528 | 526 | ||
| 529 | spin_lock_irqsave(&ctx->completion_lock, flags); | 527 | spin_lock_irqsave(&ctx->completion_lock, flags); |
| 530 | io_cqring_fill_event(ctx, user_data, res, ev_flags); | 528 | io_cqring_fill_event(ctx, user_data, res); |
| 531 | io_commit_cqring(ctx); | 529 | io_commit_cqring(ctx); |
| 532 | spin_unlock_irqrestore(&ctx->completion_lock, flags); | 530 | spin_unlock_irqrestore(&ctx->completion_lock, flags); |
| 533 | 531 | ||
| @@ -629,7 +627,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, | |||
| 629 | req = list_first_entry(done, struct io_kiocb, list); | 627 | req = list_first_entry(done, struct io_kiocb, list); |
| 630 | list_del(&req->list); | 628 | list_del(&req->list); |
| 631 | 629 | ||
| 632 | io_cqring_fill_event(ctx, req->user_data, req->error, 0); | 630 | io_cqring_fill_event(ctx, req->user_data, req->error); |
| 633 | (*nr_events)++; | 631 | (*nr_events)++; |
| 634 | 632 | ||
| 635 | if (refcount_dec_and_test(&req->refs)) { | 633 | if (refcount_dec_and_test(&req->refs)) { |
| @@ -777,7 +775,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) | |||
| 777 | 775 | ||
| 778 | kiocb_end_write(kiocb); | 776 | kiocb_end_write(kiocb); |
| 779 | 777 | ||
| 780 | io_cqring_add_event(req->ctx, req->user_data, res, 0); | 778 | io_cqring_add_event(req->ctx, req->user_data, res); |
| 781 | io_put_req(req); | 779 | io_put_req(req); |
| 782 | } | 780 | } |
| 783 | 781 | ||
| @@ -896,9 +894,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, | |||
| 896 | 894 | ||
| 897 | if (!req->file) | 895 | if (!req->file) |
| 898 | return -EBADF; | 896 | return -EBADF; |
| 899 | /* For -EAGAIN retry, everything is already prepped */ | ||
| 900 | if (req->flags & REQ_F_PREPPED) | ||
| 901 | return 0; | ||
| 902 | 897 | ||
| 903 | if (force_nonblock && !io_file_supports_async(req->file)) | 898 | if (force_nonblock && !io_file_supports_async(req->file)) |
| 904 | force_nonblock = false; | 899 | force_nonblock = false; |
| @@ -941,7 +936,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, | |||
| 941 | return -EINVAL; | 936 | return -EINVAL; |
| 942 | kiocb->ki_complete = io_complete_rw; | 937 | kiocb->ki_complete = io_complete_rw; |
| 943 | } | 938 | } |
| 944 | req->flags |= REQ_F_PREPPED; | ||
| 945 | return 0; | 939 | return 0; |
| 946 | } | 940 | } |
| 947 | 941 | ||
| @@ -1216,7 +1210,7 @@ static int io_nop(struct io_kiocb *req, u64 user_data) | |||
| 1216 | if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) | 1210 | if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) |
| 1217 | return -EINVAL; | 1211 | return -EINVAL; |
| 1218 | 1212 | ||
| 1219 | io_cqring_add_event(ctx, user_data, err, 0); | 1213 | io_cqring_add_event(ctx, user_data, err); |
| 1220 | io_put_req(req); | 1214 | io_put_req(req); |
| 1221 | return 0; | 1215 | return 0; |
| 1222 | } | 1216 | } |
| @@ -1227,16 +1221,12 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) | |||
| 1227 | 1221 | ||
| 1228 | if (!req->file) | 1222 | if (!req->file) |
| 1229 | return -EBADF; | 1223 | return -EBADF; |
| 1230 | /* Prep already done (EAGAIN retry) */ | ||
| 1231 | if (req->flags & REQ_F_PREPPED) | ||
| 1232 | return 0; | ||
| 1233 | 1224 | ||
| 1234 | if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) | 1225 | if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) |
| 1235 | return -EINVAL; | 1226 | return -EINVAL; |
| 1236 | if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) | 1227 | if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) |
| 1237 | return -EINVAL; | 1228 | return -EINVAL; |
| 1238 | 1229 | ||
| 1239 | req->flags |= REQ_F_PREPPED; | ||
| 1240 | return 0; | 1230 | return 0; |
| 1241 | } | 1231 | } |
| 1242 | 1232 | ||
| @@ -1265,7 +1255,7 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, | |||
| 1265 | end > 0 ? end : LLONG_MAX, | 1255 | end > 0 ? end : LLONG_MAX, |
| 1266 | fsync_flags & IORING_FSYNC_DATASYNC); | 1256 | fsync_flags & IORING_FSYNC_DATASYNC); |
| 1267 | 1257 | ||
| 1268 | io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); | 1258 | io_cqring_add_event(req->ctx, sqe->user_data, ret); |
| 1269 | io_put_req(req); | 1259 | io_put_req(req); |
| 1270 | return 0; | 1260 | return 0; |
| 1271 | } | 1261 | } |
| @@ -1277,16 +1267,12 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) | |||
| 1277 | 1267 | ||
| 1278 | if (!req->file) | 1268 | if (!req->file) |
| 1279 | return -EBADF; | 1269 | return -EBADF; |
| 1280 | /* Prep already done (EAGAIN retry) */ | ||
| 1281 | if (req->flags & REQ_F_PREPPED) | ||
| 1282 | return 0; | ||
| 1283 | 1270 | ||
| 1284 | if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) | 1271 | if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) |
| 1285 | return -EINVAL; | 1272 | return -EINVAL; |
| 1286 | if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) | 1273 | if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) |
| 1287 | return -EINVAL; | 1274 | return -EINVAL; |
| 1288 | 1275 | ||
| 1289 | req->flags |= REQ_F_PREPPED; | ||
| 1290 | return ret; | 1276 | return ret; |
| 1291 | } | 1277 | } |
| 1292 | 1278 | ||
| @@ -1313,7 +1299,7 @@ static int io_sync_file_range(struct io_kiocb *req, | |||
| 1313 | 1299 | ||
| 1314 | ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); | 1300 | ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); |
| 1315 | 1301 | ||
| 1316 | io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); | 1302 | io_cqring_add_event(req->ctx, sqe->user_data, ret); |
| 1317 | io_put_req(req); | 1303 | io_put_req(req); |
| 1318 | return 0; | 1304 | return 0; |
| 1319 | } | 1305 | } |
| @@ -1371,7 +1357,7 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe) | |||
| 1371 | } | 1357 | } |
| 1372 | spin_unlock_irq(&ctx->completion_lock); | 1358 | spin_unlock_irq(&ctx->completion_lock); |
| 1373 | 1359 | ||
| 1374 | io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); | 1360 | io_cqring_add_event(req->ctx, sqe->user_data, ret); |
| 1375 | io_put_req(req); | 1361 | io_put_req(req); |
| 1376 | return 0; | 1362 | return 0; |
| 1377 | } | 1363 | } |
| @@ -1380,7 +1366,7 @@ static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req, | |||
| 1380 | __poll_t mask) | 1366 | __poll_t mask) |
| 1381 | { | 1367 | { |
| 1382 | req->poll.done = true; | 1368 | req->poll.done = true; |
| 1383 | io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0); | 1369 | io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask)); |
| 1384 | io_commit_cqring(ctx); | 1370 | io_commit_cqring(ctx); |
| 1385 | } | 1371 | } |
| 1386 | 1372 | ||
| @@ -1700,7 +1686,7 @@ restart: | |||
| 1700 | io_put_req(req); | 1686 | io_put_req(req); |
| 1701 | 1687 | ||
| 1702 | if (ret) { | 1688 | if (ret) { |
| 1703 | io_cqring_add_event(ctx, sqe->user_data, ret, 0); | 1689 | io_cqring_add_event(ctx, sqe->user_data, ret); |
| 1704 | io_put_req(req); | 1690 | io_put_req(req); |
| 1705 | } | 1691 | } |
| 1706 | 1692 | ||
| @@ -2005,7 +1991,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, | |||
| 2005 | continue; | 1991 | continue; |
| 2006 | } | 1992 | } |
| 2007 | 1993 | ||
| 2008 | io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret, 0); | 1994 | io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret); |
| 2009 | } | 1995 | } |
| 2010 | 1996 | ||
| 2011 | if (statep) | 1997 | if (statep) |
| @@ -2028,7 +2014,7 @@ static int io_sq_thread(void *data) | |||
| 2028 | set_fs(USER_DS); | 2014 | set_fs(USER_DS); |
| 2029 | 2015 | ||
| 2030 | timeout = inflight = 0; | 2016 | timeout = inflight = 0; |
| 2031 | while (!kthread_should_stop() && !ctx->sqo_stop) { | 2017 | while (!kthread_should_park()) { |
| 2032 | bool all_fixed, mm_fault = false; | 2018 | bool all_fixed, mm_fault = false; |
| 2033 | int i; | 2019 | int i; |
| 2034 | 2020 | ||
| @@ -2090,7 +2076,7 @@ static int io_sq_thread(void *data) | |||
| 2090 | smp_mb(); | 2076 | smp_mb(); |
| 2091 | 2077 | ||
| 2092 | if (!io_get_sqring(ctx, &sqes[0])) { | 2078 | if (!io_get_sqring(ctx, &sqes[0])) { |
| 2093 | if (kthread_should_stop()) { | 2079 | if (kthread_should_park()) { |
| 2094 | finish_wait(&ctx->sqo_wait, &wait); | 2080 | finish_wait(&ctx->sqo_wait, &wait); |
| 2095 | break; | 2081 | break; |
| 2096 | } | 2082 | } |
| @@ -2140,8 +2126,7 @@ static int io_sq_thread(void *data) | |||
| 2140 | mmput(cur_mm); | 2126 | mmput(cur_mm); |
| 2141 | } | 2127 | } |
| 2142 | 2128 | ||
| 2143 | if (kthread_should_park()) | 2129 | kthread_parkme(); |
| 2144 | kthread_parkme(); | ||
| 2145 | 2130 | ||
| 2146 | return 0; | 2131 | return 0; |
| 2147 | } | 2132 | } |
| @@ -2170,7 +2155,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) | |||
| 2170 | 2155 | ||
| 2171 | ret = io_submit_sqe(ctx, &s, statep); | 2156 | ret = io_submit_sqe(ctx, &s, statep); |
| 2172 | if (ret) | 2157 | if (ret) |
| 2173 | io_cqring_add_event(ctx, s.sqe->user_data, ret, 0); | 2158 | io_cqring_add_event(ctx, s.sqe->user_data, ret); |
| 2174 | } | 2159 | } |
| 2175 | io_commit_sqring(ctx); | 2160 | io_commit_sqring(ctx); |
| 2176 | 2161 | ||
| @@ -2182,6 +2167,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) | |||
| 2182 | 2167 | ||
| 2183 | static unsigned io_cqring_events(struct io_cq_ring *ring) | 2168 | static unsigned io_cqring_events(struct io_cq_ring *ring) |
| 2184 | { | 2169 | { |
| 2170 | /* See comment at the top of this file */ | ||
| 2171 | smp_rmb(); | ||
| 2185 | return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head); | 2172 | return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head); |
| 2186 | } | 2173 | } |
| 2187 | 2174 | ||
| @@ -2194,11 +2181,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, | |||
| 2194 | { | 2181 | { |
| 2195 | struct io_cq_ring *ring = ctx->cq_ring; | 2182 | struct io_cq_ring *ring = ctx->cq_ring; |
| 2196 | sigset_t ksigmask, sigsaved; | 2183 | sigset_t ksigmask, sigsaved; |
| 2197 | DEFINE_WAIT(wait); | ||
| 2198 | int ret; | 2184 | int ret; |
| 2199 | 2185 | ||
| 2200 | /* See comment at the top of this file */ | ||
| 2201 | smp_rmb(); | ||
| 2202 | if (io_cqring_events(ring) >= min_events) | 2186 | if (io_cqring_events(ring) >= min_events) |
| 2203 | return 0; | 2187 | return 0; |
| 2204 | 2188 | ||
| @@ -2216,23 +2200,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, | |||
| 2216 | return ret; | 2200 | return ret; |
| 2217 | } | 2201 | } |
| 2218 | 2202 | ||
| 2219 | do { | 2203 | ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); |
| 2220 | prepare_to_wait(&ctx->wait, &wait, TASK_INTERRUPTIBLE); | 2204 | if (ret == -ERESTARTSYS) |
| 2221 | |||
| 2222 | ret = 0; | ||
| 2223 | /* See comment at the top of this file */ | ||
| 2224 | smp_rmb(); | ||
| 2225 | if (io_cqring_events(ring) >= min_events) | ||
| 2226 | break; | ||
| 2227 | |||
| 2228 | schedule(); | ||
| 2229 | |||
| 2230 | ret = -EINTR; | 2205 | ret = -EINTR; |
| 2231 | if (signal_pending(current)) | ||
| 2232 | break; | ||
| 2233 | } while (1); | ||
| 2234 | |||
| 2235 | finish_wait(&ctx->wait, &wait); | ||
| 2236 | 2206 | ||
| 2237 | if (sig) | 2207 | if (sig) |
| 2238 | restore_user_sigmask(sig, &sigsaved); | 2208 | restore_user_sigmask(sig, &sigsaved); |
| @@ -2273,8 +2243,11 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) | |||
| 2273 | static void io_sq_thread_stop(struct io_ring_ctx *ctx) | 2243 | static void io_sq_thread_stop(struct io_ring_ctx *ctx) |
| 2274 | { | 2244 | { |
| 2275 | if (ctx->sqo_thread) { | 2245 | if (ctx->sqo_thread) { |
| 2276 | ctx->sqo_stop = 1; | 2246 | /* |
| 2277 | mb(); | 2247 | * The park is a bit of a work-around, without it we get |
| 2248 | * warning spews on shutdown with SQPOLL set and affinity | ||
| 2249 | * set to a single CPU. | ||
| 2250 | */ | ||
| 2278 | kthread_park(ctx->sqo_thread); | 2251 | kthread_park(ctx->sqo_thread); |
| 2279 | kthread_stop(ctx->sqo_thread); | 2252 | kthread_stop(ctx->sqo_thread); |
| 2280 | ctx->sqo_thread = NULL; | 2253 | ctx->sqo_thread = NULL; |
| @@ -2467,10 +2440,11 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, | |||
| 2467 | ctx->sq_thread_idle = HZ; | 2440 | ctx->sq_thread_idle = HZ; |
| 2468 | 2441 | ||
| 2469 | if (p->flags & IORING_SETUP_SQ_AFF) { | 2442 | if (p->flags & IORING_SETUP_SQ_AFF) { |
| 2470 | int cpu = array_index_nospec(p->sq_thread_cpu, | 2443 | int cpu = p->sq_thread_cpu; |
| 2471 | nr_cpu_ids); | ||
| 2472 | 2444 | ||
| 2473 | ret = -EINVAL; | 2445 | ret = -EINVAL; |
| 2446 | if (cpu >= nr_cpu_ids) | ||
| 2447 | goto err; | ||
| 2474 | if (!cpu_online(cpu)) | 2448 | if (!cpu_online(cpu)) |
| 2475 | goto err; | 2449 | goto err; |
| 2476 | 2450 | ||
