aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-16 22:10:37 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-16 22:10:37 -0400
commita6a4b66bd8f41922c543f7a820c66ed59c25995e (patch)
tree1ab2c591cb14eac5b28e5de19d9818e566e79c44
parent1718de78e6235c04ecb7f87a6875fdf90aafe382 (diff)
parentfdb288a679cdf6a71f3c1ae6f348ba4dae742681 (diff)
Merge tag 'for-linus-20190516' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe: "A small set of fixes for io_uring. This contains: - smp_rmb() cleanup for io_cqring_events() (Jackie) - io_cqring_wait() simplification (Jackie) - removal of dead 'ev_flags' passing (me) - SQ poll CPU affinity verification fix (me) - SQ poll wait fix (Roman) - SQE command prep cleanup and fix (Stefan)" * tag 'for-linus-20190516' of git://git.kernel.dk/linux-block: io_uring: use wait_event_interruptible for cq_wait conditional wait io_uring: adjust smp_rmb inside io_cqring_events io_uring: fix infinite wait in khread_park() on io_finish_async() io_uring: remove 'ev_flags' argument io_uring: fix failure to verify SQ_AFF cpu io_uring: fix race condition reading SQE data
-rw-r--r--fs/io_uring.c88
1 files changed, 31 insertions, 57 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index fdc18321d70c..310f8d17c53e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -231,7 +231,6 @@ struct io_ring_ctx {
231 struct task_struct *sqo_thread; /* if using sq thread polling */ 231 struct task_struct *sqo_thread; /* if using sq thread polling */
232 struct mm_struct *sqo_mm; 232 struct mm_struct *sqo_mm;
233 wait_queue_head_t sqo_wait; 233 wait_queue_head_t sqo_wait;
234 unsigned sqo_stop;
235 234
236 struct { 235 struct {
237 /* CQ ring */ 236 /* CQ ring */
@@ -329,9 +328,8 @@ struct io_kiocb {
329#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ 328#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
330#define REQ_F_FIXED_FILE 4 /* ctx owns file */ 329#define REQ_F_FIXED_FILE 4 /* ctx owns file */
331#define REQ_F_SEQ_PREV 8 /* sequential with previous */ 330#define REQ_F_SEQ_PREV 8 /* sequential with previous */
332#define REQ_F_PREPPED 16 /* prep already done */ 331#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
333#define REQ_F_IO_DRAIN 32 /* drain existing IO first */ 332#define REQ_F_IO_DRAINED 32 /* drain done */
334#define REQ_F_IO_DRAINED 64 /* drain done */
335 u64 user_data; 333 u64 user_data;
336 u32 error; /* iopoll result from callback */ 334 u32 error; /* iopoll result from callback */
337 u32 sequence; 335 u32 sequence;
@@ -490,7 +488,7 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
490} 488}
491 489
492static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, 490static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
493 long res, unsigned ev_flags) 491 long res)
494{ 492{
495 struct io_uring_cqe *cqe; 493 struct io_uring_cqe *cqe;
496 494
@@ -503,7 +501,7 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
503 if (cqe) { 501 if (cqe) {
504 WRITE_ONCE(cqe->user_data, ki_user_data); 502 WRITE_ONCE(cqe->user_data, ki_user_data);
505 WRITE_ONCE(cqe->res, res); 503 WRITE_ONCE(cqe->res, res);
506 WRITE_ONCE(cqe->flags, ev_flags); 504 WRITE_ONCE(cqe->flags, 0);
507 } else { 505 } else {
508 unsigned overflow = READ_ONCE(ctx->cq_ring->overflow); 506 unsigned overflow = READ_ONCE(ctx->cq_ring->overflow);
509 507
@@ -522,12 +520,12 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
522} 520}
523 521
524static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data, 522static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data,
525 long res, unsigned ev_flags) 523 long res)
526{ 524{
527 unsigned long flags; 525 unsigned long flags;
528 526
529 spin_lock_irqsave(&ctx->completion_lock, flags); 527 spin_lock_irqsave(&ctx->completion_lock, flags);
530 io_cqring_fill_event(ctx, user_data, res, ev_flags); 528 io_cqring_fill_event(ctx, user_data, res);
531 io_commit_cqring(ctx); 529 io_commit_cqring(ctx);
532 spin_unlock_irqrestore(&ctx->completion_lock, flags); 530 spin_unlock_irqrestore(&ctx->completion_lock, flags);
533 531
@@ -629,7 +627,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
629 req = list_first_entry(done, struct io_kiocb, list); 627 req = list_first_entry(done, struct io_kiocb, list);
630 list_del(&req->list); 628 list_del(&req->list);
631 629
632 io_cqring_fill_event(ctx, req->user_data, req->error, 0); 630 io_cqring_fill_event(ctx, req->user_data, req->error);
633 (*nr_events)++; 631 (*nr_events)++;
634 632
635 if (refcount_dec_and_test(&req->refs)) { 633 if (refcount_dec_and_test(&req->refs)) {
@@ -777,7 +775,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
777 775
778 kiocb_end_write(kiocb); 776 kiocb_end_write(kiocb);
779 777
780 io_cqring_add_event(req->ctx, req->user_data, res, 0); 778 io_cqring_add_event(req->ctx, req->user_data, res);
781 io_put_req(req); 779 io_put_req(req);
782} 780}
783 781
@@ -896,9 +894,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
896 894
897 if (!req->file) 895 if (!req->file)
898 return -EBADF; 896 return -EBADF;
899 /* For -EAGAIN retry, everything is already prepped */
900 if (req->flags & REQ_F_PREPPED)
901 return 0;
902 897
903 if (force_nonblock && !io_file_supports_async(req->file)) 898 if (force_nonblock && !io_file_supports_async(req->file))
904 force_nonblock = false; 899 force_nonblock = false;
@@ -941,7 +936,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
941 return -EINVAL; 936 return -EINVAL;
942 kiocb->ki_complete = io_complete_rw; 937 kiocb->ki_complete = io_complete_rw;
943 } 938 }
944 req->flags |= REQ_F_PREPPED;
945 return 0; 939 return 0;
946} 940}
947 941
@@ -1216,7 +1210,7 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
1216 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) 1210 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
1217 return -EINVAL; 1211 return -EINVAL;
1218 1212
1219 io_cqring_add_event(ctx, user_data, err, 0); 1213 io_cqring_add_event(ctx, user_data, err);
1220 io_put_req(req); 1214 io_put_req(req);
1221 return 0; 1215 return 0;
1222} 1216}
@@ -1227,16 +1221,12 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1227 1221
1228 if (!req->file) 1222 if (!req->file)
1229 return -EBADF; 1223 return -EBADF;
1230 /* Prep already done (EAGAIN retry) */
1231 if (req->flags & REQ_F_PREPPED)
1232 return 0;
1233 1224
1234 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) 1225 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
1235 return -EINVAL; 1226 return -EINVAL;
1236 if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) 1227 if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
1237 return -EINVAL; 1228 return -EINVAL;
1238 1229
1239 req->flags |= REQ_F_PREPPED;
1240 return 0; 1230 return 0;
1241} 1231}
1242 1232
@@ -1265,7 +1255,7 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
1265 end > 0 ? end : LLONG_MAX, 1255 end > 0 ? end : LLONG_MAX,
1266 fsync_flags & IORING_FSYNC_DATASYNC); 1256 fsync_flags & IORING_FSYNC_DATASYNC);
1267 1257
1268 io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); 1258 io_cqring_add_event(req->ctx, sqe->user_data, ret);
1269 io_put_req(req); 1259 io_put_req(req);
1270 return 0; 1260 return 0;
1271} 1261}
@@ -1277,16 +1267,12 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1277 1267
1278 if (!req->file) 1268 if (!req->file)
1279 return -EBADF; 1269 return -EBADF;
1280 /* Prep already done (EAGAIN retry) */
1281 if (req->flags & REQ_F_PREPPED)
1282 return 0;
1283 1270
1284 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) 1271 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
1285 return -EINVAL; 1272 return -EINVAL;
1286 if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) 1273 if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
1287 return -EINVAL; 1274 return -EINVAL;
1288 1275
1289 req->flags |= REQ_F_PREPPED;
1290 return ret; 1276 return ret;
1291} 1277}
1292 1278
@@ -1313,7 +1299,7 @@ static int io_sync_file_range(struct io_kiocb *req,
1313 1299
1314 ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); 1300 ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags);
1315 1301
1316 io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); 1302 io_cqring_add_event(req->ctx, sqe->user_data, ret);
1317 io_put_req(req); 1303 io_put_req(req);
1318 return 0; 1304 return 0;
1319} 1305}
@@ -1371,7 +1357,7 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1371 } 1357 }
1372 spin_unlock_irq(&ctx->completion_lock); 1358 spin_unlock_irq(&ctx->completion_lock);
1373 1359
1374 io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); 1360 io_cqring_add_event(req->ctx, sqe->user_data, ret);
1375 io_put_req(req); 1361 io_put_req(req);
1376 return 0; 1362 return 0;
1377} 1363}
@@ -1380,7 +1366,7 @@ static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req,
1380 __poll_t mask) 1366 __poll_t mask)
1381{ 1367{
1382 req->poll.done = true; 1368 req->poll.done = true;
1383 io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0); 1369 io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask));
1384 io_commit_cqring(ctx); 1370 io_commit_cqring(ctx);
1385} 1371}
1386 1372
@@ -1700,7 +1686,7 @@ restart:
1700 io_put_req(req); 1686 io_put_req(req);
1701 1687
1702 if (ret) { 1688 if (ret) {
1703 io_cqring_add_event(ctx, sqe->user_data, ret, 0); 1689 io_cqring_add_event(ctx, sqe->user_data, ret);
1704 io_put_req(req); 1690 io_put_req(req);
1705 } 1691 }
1706 1692
@@ -2005,7 +1991,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
2005 continue; 1991 continue;
2006 } 1992 }
2007 1993
2008 io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret, 0); 1994 io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret);
2009 } 1995 }
2010 1996
2011 if (statep) 1997 if (statep)
@@ -2028,7 +2014,7 @@ static int io_sq_thread(void *data)
2028 set_fs(USER_DS); 2014 set_fs(USER_DS);
2029 2015
2030 timeout = inflight = 0; 2016 timeout = inflight = 0;
2031 while (!kthread_should_stop() && !ctx->sqo_stop) { 2017 while (!kthread_should_park()) {
2032 bool all_fixed, mm_fault = false; 2018 bool all_fixed, mm_fault = false;
2033 int i; 2019 int i;
2034 2020
@@ -2090,7 +2076,7 @@ static int io_sq_thread(void *data)
2090 smp_mb(); 2076 smp_mb();
2091 2077
2092 if (!io_get_sqring(ctx, &sqes[0])) { 2078 if (!io_get_sqring(ctx, &sqes[0])) {
2093 if (kthread_should_stop()) { 2079 if (kthread_should_park()) {
2094 finish_wait(&ctx->sqo_wait, &wait); 2080 finish_wait(&ctx->sqo_wait, &wait);
2095 break; 2081 break;
2096 } 2082 }
@@ -2140,8 +2126,7 @@ static int io_sq_thread(void *data)
2140 mmput(cur_mm); 2126 mmput(cur_mm);
2141 } 2127 }
2142 2128
2143 if (kthread_should_park()) 2129 kthread_parkme();
2144 kthread_parkme();
2145 2130
2146 return 0; 2131 return 0;
2147} 2132}
@@ -2170,7 +2155,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
2170 2155
2171 ret = io_submit_sqe(ctx, &s, statep); 2156 ret = io_submit_sqe(ctx, &s, statep);
2172 if (ret) 2157 if (ret)
2173 io_cqring_add_event(ctx, s.sqe->user_data, ret, 0); 2158 io_cqring_add_event(ctx, s.sqe->user_data, ret);
2174 } 2159 }
2175 io_commit_sqring(ctx); 2160 io_commit_sqring(ctx);
2176 2161
@@ -2182,6 +2167,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
2182 2167
2183static unsigned io_cqring_events(struct io_cq_ring *ring) 2168static unsigned io_cqring_events(struct io_cq_ring *ring)
2184{ 2169{
2170 /* See comment at the top of this file */
2171 smp_rmb();
2185 return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head); 2172 return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head);
2186} 2173}
2187 2174
@@ -2194,11 +2181,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
2194{ 2181{
2195 struct io_cq_ring *ring = ctx->cq_ring; 2182 struct io_cq_ring *ring = ctx->cq_ring;
2196 sigset_t ksigmask, sigsaved; 2183 sigset_t ksigmask, sigsaved;
2197 DEFINE_WAIT(wait);
2198 int ret; 2184 int ret;
2199 2185
2200 /* See comment at the top of this file */
2201 smp_rmb();
2202 if (io_cqring_events(ring) >= min_events) 2186 if (io_cqring_events(ring) >= min_events)
2203 return 0; 2187 return 0;
2204 2188
@@ -2216,23 +2200,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
2216 return ret; 2200 return ret;
2217 } 2201 }
2218 2202
2219 do { 2203 ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
2220 prepare_to_wait(&ctx->wait, &wait, TASK_INTERRUPTIBLE); 2204 if (ret == -ERESTARTSYS)
2221
2222 ret = 0;
2223 /* See comment at the top of this file */
2224 smp_rmb();
2225 if (io_cqring_events(ring) >= min_events)
2226 break;
2227
2228 schedule();
2229
2230 ret = -EINTR; 2205 ret = -EINTR;
2231 if (signal_pending(current))
2232 break;
2233 } while (1);
2234
2235 finish_wait(&ctx->wait, &wait);
2236 2206
2237 if (sig) 2207 if (sig)
2238 restore_user_sigmask(sig, &sigsaved); 2208 restore_user_sigmask(sig, &sigsaved);
@@ -2273,8 +2243,11 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
2273static void io_sq_thread_stop(struct io_ring_ctx *ctx) 2243static void io_sq_thread_stop(struct io_ring_ctx *ctx)
2274{ 2244{
2275 if (ctx->sqo_thread) { 2245 if (ctx->sqo_thread) {
2276 ctx->sqo_stop = 1; 2246 /*
2277 mb(); 2247 * The park is a bit of a work-around, without it we get
2248 * warning spews on shutdown with SQPOLL set and affinity
2249 * set to a single CPU.
2250 */
2278 kthread_park(ctx->sqo_thread); 2251 kthread_park(ctx->sqo_thread);
2279 kthread_stop(ctx->sqo_thread); 2252 kthread_stop(ctx->sqo_thread);
2280 ctx->sqo_thread = NULL; 2253 ctx->sqo_thread = NULL;
@@ -2467,10 +2440,11 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
2467 ctx->sq_thread_idle = HZ; 2440 ctx->sq_thread_idle = HZ;
2468 2441
2469 if (p->flags & IORING_SETUP_SQ_AFF) { 2442 if (p->flags & IORING_SETUP_SQ_AFF) {
2470 int cpu = array_index_nospec(p->sq_thread_cpu, 2443 int cpu = p->sq_thread_cpu;
2471 nr_cpu_ids);
2472 2444
2473 ret = -EINVAL; 2445 ret = -EINVAL;
2446 if (cpu >= nr_cpu_ids)
2447 goto err;
2474 if (!cpu_online(cpu)) 2448 if (!cpu_online(cpu))
2475 goto err; 2449 goto err;
2476 2450