diff options
-rw-r--r-- | fs/aio.c | 9 | ||||
-rw-r--r-- | fs/io_uring.c | 338 | ||||
-rw-r--r-- | fs/splice.c | 8 | ||||
-rw-r--r-- | include/linux/socket.h | 7 | ||||
-rw-r--r-- | include/linux/uio.h | 4 | ||||
-rw-r--r-- | include/uapi/linux/io_uring.h | 4 | ||||
-rw-r--r-- | lib/iov_iter.c | 15 | ||||
-rw-r--r-- | net/compat.c | 3 | ||||
-rw-r--r-- | net/socket.c | 18 |
9 files changed, 330 insertions, 76 deletions
@@ -1479,8 +1479,9 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) | |||
1479 | return 0; | 1479 | return 0; |
1480 | } | 1480 | } |
1481 | 1481 | ||
1482 | static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec, | 1482 | static ssize_t aio_setup_rw(int rw, const struct iocb *iocb, |
1483 | bool vectored, bool compat, struct iov_iter *iter) | 1483 | struct iovec **iovec, bool vectored, bool compat, |
1484 | struct iov_iter *iter) | ||
1484 | { | 1485 | { |
1485 | void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf; | 1486 | void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf; |
1486 | size_t len = iocb->aio_nbytes; | 1487 | size_t len = iocb->aio_nbytes; |
@@ -1537,7 +1538,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb, | |||
1537 | return -EINVAL; | 1538 | return -EINVAL; |
1538 | 1539 | ||
1539 | ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); | 1540 | ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); |
1540 | if (ret) | 1541 | if (ret < 0) |
1541 | return ret; | 1542 | return ret; |
1542 | ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); | 1543 | ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); |
1543 | if (!ret) | 1544 | if (!ret) |
@@ -1565,7 +1566,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, | |||
1565 | return -EINVAL; | 1566 | return -EINVAL; |
1566 | 1567 | ||
1567 | ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); | 1568 | ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); |
1568 | if (ret) | 1569 | if (ret < 0) |
1569 | return ret; | 1570 | return ret; |
1570 | ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); | 1571 | ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); |
1571 | if (!ret) { | 1572 | if (!ret) { |
diff --git a/fs/io_uring.c b/fs/io_uring.c index 4ed4b110a154..3fd884b4e0be 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c | |||
@@ -231,6 +231,7 @@ struct io_ring_ctx { | |||
231 | struct task_struct *sqo_thread; /* if using sq thread polling */ | 231 | struct task_struct *sqo_thread; /* if using sq thread polling */ |
232 | struct mm_struct *sqo_mm; | 232 | struct mm_struct *sqo_mm; |
233 | wait_queue_head_t sqo_wait; | 233 | wait_queue_head_t sqo_wait; |
234 | struct completion sqo_thread_started; | ||
234 | 235 | ||
235 | struct { | 236 | struct { |
236 | /* CQ ring */ | 237 | /* CQ ring */ |
@@ -322,6 +323,7 @@ struct io_kiocb { | |||
322 | 323 | ||
323 | struct io_ring_ctx *ctx; | 324 | struct io_ring_ctx *ctx; |
324 | struct list_head list; | 325 | struct list_head list; |
326 | struct list_head link_list; | ||
325 | unsigned int flags; | 327 | unsigned int flags; |
326 | refcount_t refs; | 328 | refcount_t refs; |
327 | #define REQ_F_NOWAIT 1 /* must not punt to workers */ | 329 | #define REQ_F_NOWAIT 1 /* must not punt to workers */ |
@@ -330,8 +332,10 @@ struct io_kiocb { | |||
330 | #define REQ_F_SEQ_PREV 8 /* sequential with previous */ | 332 | #define REQ_F_SEQ_PREV 8 /* sequential with previous */ |
331 | #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ | 333 | #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ |
332 | #define REQ_F_IO_DRAINED 32 /* drain done */ | 334 | #define REQ_F_IO_DRAINED 32 /* drain done */ |
335 | #define REQ_F_LINK 64 /* linked sqes */ | ||
336 | #define REQ_F_FAIL_LINK 128 /* fail rest of links */ | ||
333 | u64 user_data; | 337 | u64 user_data; |
334 | u32 error; /* iopoll result from callback */ | 338 | u32 result; |
335 | u32 sequence; | 339 | u32 sequence; |
336 | 340 | ||
337 | struct work_struct work; | 341 | struct work_struct work; |
@@ -403,6 +407,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) | |||
403 | ctx->flags = p->flags; | 407 | ctx->flags = p->flags; |
404 | init_waitqueue_head(&ctx->cq_wait); | 408 | init_waitqueue_head(&ctx->cq_wait); |
405 | init_completion(&ctx->ctx_done); | 409 | init_completion(&ctx->ctx_done); |
410 | init_completion(&ctx->sqo_thread_started); | ||
406 | mutex_init(&ctx->uring_lock); | 411 | mutex_init(&ctx->uring_lock); |
407 | init_waitqueue_head(&ctx->wait); | 412 | init_waitqueue_head(&ctx->wait); |
408 | for (i = 0; i < ARRAY_SIZE(ctx->pending_async); i++) { | 413 | for (i = 0; i < ARRAY_SIZE(ctx->pending_async); i++) { |
@@ -584,6 +589,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, | |||
584 | req->flags = 0; | 589 | req->flags = 0; |
585 | /* one is dropped after submission, the other at completion */ | 590 | /* one is dropped after submission, the other at completion */ |
586 | refcount_set(&req->refs, 2); | 591 | refcount_set(&req->refs, 2); |
592 | req->result = 0; | ||
587 | return req; | 593 | return req; |
588 | out: | 594 | out: |
589 | io_ring_drop_ctx_refs(ctx, 1); | 595 | io_ring_drop_ctx_refs(ctx, 1); |
@@ -599,7 +605,7 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr) | |||
599 | } | 605 | } |
600 | } | 606 | } |
601 | 607 | ||
602 | static void io_free_req(struct io_kiocb *req) | 608 | static void __io_free_req(struct io_kiocb *req) |
603 | { | 609 | { |
604 | if (req->file && !(req->flags & REQ_F_FIXED_FILE)) | 610 | if (req->file && !(req->flags & REQ_F_FIXED_FILE)) |
605 | fput(req->file); | 611 | fput(req->file); |
@@ -607,6 +613,63 @@ static void io_free_req(struct io_kiocb *req) | |||
607 | kmem_cache_free(req_cachep, req); | 613 | kmem_cache_free(req_cachep, req); |
608 | } | 614 | } |
609 | 615 | ||
616 | static void io_req_link_next(struct io_kiocb *req) | ||
617 | { | ||
618 | struct io_kiocb *nxt; | ||
619 | |||
620 | /* | ||
621 | * The list should never be empty when we are called here. But could | ||
622 | * potentially happen if the chain is messed up, check to be on the | ||
623 | * safe side. | ||
624 | */ | ||
625 | nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, list); | ||
626 | if (nxt) { | ||
627 | list_del(&nxt->list); | ||
628 | if (!list_empty(&req->link_list)) { | ||
629 | INIT_LIST_HEAD(&nxt->link_list); | ||
630 | list_splice(&req->link_list, &nxt->link_list); | ||
631 | nxt->flags |= REQ_F_LINK; | ||
632 | } | ||
633 | |||
634 | INIT_WORK(&nxt->work, io_sq_wq_submit_work); | ||
635 | queue_work(req->ctx->sqo_wq, &nxt->work); | ||
636 | } | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * Called if REQ_F_LINK is set, and we fail the head request | ||
641 | */ | ||
642 | static void io_fail_links(struct io_kiocb *req) | ||
643 | { | ||
644 | struct io_kiocb *link; | ||
645 | |||
646 | while (!list_empty(&req->link_list)) { | ||
647 | link = list_first_entry(&req->link_list, struct io_kiocb, list); | ||
648 | list_del(&link->list); | ||
649 | |||
650 | io_cqring_add_event(req->ctx, link->user_data, -ECANCELED); | ||
651 | __io_free_req(link); | ||
652 | } | ||
653 | } | ||
654 | |||
655 | static void io_free_req(struct io_kiocb *req) | ||
656 | { | ||
657 | /* | ||
658 | * If LINK is set, we have dependent requests in this chain. If we | ||
659 | * didn't fail this request, queue the first one up, moving any other | ||
660 | * dependencies to the next request. In case of failure, fail the rest | ||
661 | * of the chain. | ||
662 | */ | ||
663 | if (req->flags & REQ_F_LINK) { | ||
664 | if (req->flags & REQ_F_FAIL_LINK) | ||
665 | io_fail_links(req); | ||
666 | else | ||
667 | io_req_link_next(req); | ||
668 | } | ||
669 | |||
670 | __io_free_req(req); | ||
671 | } | ||
672 | |||
610 | static void io_put_req(struct io_kiocb *req) | 673 | static void io_put_req(struct io_kiocb *req) |
611 | { | 674 | { |
612 | if (refcount_dec_and_test(&req->refs)) | 675 | if (refcount_dec_and_test(&req->refs)) |
@@ -628,16 +691,17 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, | |||
628 | req = list_first_entry(done, struct io_kiocb, list); | 691 | req = list_first_entry(done, struct io_kiocb, list); |
629 | list_del(&req->list); | 692 | list_del(&req->list); |
630 | 693 | ||
631 | io_cqring_fill_event(ctx, req->user_data, req->error); | 694 | io_cqring_fill_event(ctx, req->user_data, req->result); |
632 | (*nr_events)++; | 695 | (*nr_events)++; |
633 | 696 | ||
634 | if (refcount_dec_and_test(&req->refs)) { | 697 | if (refcount_dec_and_test(&req->refs)) { |
635 | /* If we're not using fixed files, we have to pair the | 698 | /* If we're not using fixed files, we have to pair the |
636 | * completion part with the file put. Use regular | 699 | * completion part with the file put. Use regular |
637 | * completions for those, only batch free for fixed | 700 | * completions for those, only batch free for fixed |
638 | * file. | 701 | * file and non-linked commands. |
639 | */ | 702 | */ |
640 | if (req->flags & REQ_F_FIXED_FILE) { | 703 | if ((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) == |
704 | REQ_F_FIXED_FILE) { | ||
641 | reqs[to_free++] = req; | 705 | reqs[to_free++] = req; |
642 | if (to_free == ARRAY_SIZE(reqs)) | 706 | if (to_free == ARRAY_SIZE(reqs)) |
643 | io_free_req_many(ctx, reqs, &to_free); | 707 | io_free_req_many(ctx, reqs, &to_free); |
@@ -776,6 +840,8 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) | |||
776 | 840 | ||
777 | kiocb_end_write(kiocb); | 841 | kiocb_end_write(kiocb); |
778 | 842 | ||
843 | if ((req->flags & REQ_F_LINK) && res != req->result) | ||
844 | req->flags |= REQ_F_FAIL_LINK; | ||
779 | io_cqring_add_event(req->ctx, req->user_data, res); | 845 | io_cqring_add_event(req->ctx, req->user_data, res); |
780 | io_put_req(req); | 846 | io_put_req(req); |
781 | } | 847 | } |
@@ -786,7 +852,9 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) | |||
786 | 852 | ||
787 | kiocb_end_write(kiocb); | 853 | kiocb_end_write(kiocb); |
788 | 854 | ||
789 | req->error = res; | 855 | if ((req->flags & REQ_F_LINK) && res != req->result) |
856 | req->flags |= REQ_F_FAIL_LINK; | ||
857 | req->result = res; | ||
790 | if (res != -EAGAIN) | 858 | if (res != -EAGAIN) |
791 | req->flags |= REQ_F_IOPOLL_COMPLETED; | 859 | req->flags |= REQ_F_IOPOLL_COMPLETED; |
792 | } | 860 | } |
@@ -929,7 +997,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, | |||
929 | !kiocb->ki_filp->f_op->iopoll) | 997 | !kiocb->ki_filp->f_op->iopoll) |
930 | return -EOPNOTSUPP; | 998 | return -EOPNOTSUPP; |
931 | 999 | ||
932 | req->error = 0; | ||
933 | kiocb->ki_flags |= IOCB_HIPRI; | 1000 | kiocb->ki_flags |= IOCB_HIPRI; |
934 | kiocb->ki_complete = io_complete_rw_iopoll; | 1001 | kiocb->ki_complete = io_complete_rw_iopoll; |
935 | } else { | 1002 | } else { |
@@ -1001,9 +1068,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, | |||
1001 | return 0; | 1068 | return 0; |
1002 | } | 1069 | } |
1003 | 1070 | ||
1004 | static int io_import_iovec(struct io_ring_ctx *ctx, int rw, | 1071 | static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw, |
1005 | const struct sqe_submit *s, struct iovec **iovec, | 1072 | const struct sqe_submit *s, struct iovec **iovec, |
1006 | struct iov_iter *iter) | 1073 | struct iov_iter *iter) |
1007 | { | 1074 | { |
1008 | const struct io_uring_sqe *sqe = s->sqe; | 1075 | const struct io_uring_sqe *sqe = s->sqe; |
1009 | void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); | 1076 | void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); |
@@ -1021,7 +1088,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw, | |||
1021 | opcode = READ_ONCE(sqe->opcode); | 1088 | opcode = READ_ONCE(sqe->opcode); |
1022 | if (opcode == IORING_OP_READ_FIXED || | 1089 | if (opcode == IORING_OP_READ_FIXED || |
1023 | opcode == IORING_OP_WRITE_FIXED) { | 1090 | opcode == IORING_OP_WRITE_FIXED) { |
1024 | int ret = io_import_fixed(ctx, rw, sqe, iter); | 1091 | ssize_t ret = io_import_fixed(ctx, rw, sqe, iter); |
1025 | *iovec = NULL; | 1092 | *iovec = NULL; |
1026 | return ret; | 1093 | return ret; |
1027 | } | 1094 | } |
@@ -1087,7 +1154,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, | |||
1087 | struct iov_iter iter; | 1154 | struct iov_iter iter; |
1088 | struct file *file; | 1155 | struct file *file; |
1089 | size_t iov_count; | 1156 | size_t iov_count; |
1090 | int ret; | 1157 | ssize_t read_size, ret; |
1091 | 1158 | ||
1092 | ret = io_prep_rw(req, s, force_nonblock); | 1159 | ret = io_prep_rw(req, s, force_nonblock); |
1093 | if (ret) | 1160 | if (ret) |
@@ -1100,16 +1167,30 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, | |||
1100 | return -EINVAL; | 1167 | return -EINVAL; |
1101 | 1168 | ||
1102 | ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter); | 1169 | ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter); |
1103 | if (ret) | 1170 | if (ret < 0) |
1104 | return ret; | 1171 | return ret; |
1105 | 1172 | ||
1173 | read_size = ret; | ||
1174 | if (req->flags & REQ_F_LINK) | ||
1175 | req->result = read_size; | ||
1176 | |||
1106 | iov_count = iov_iter_count(&iter); | 1177 | iov_count = iov_iter_count(&iter); |
1107 | ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count); | 1178 | ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count); |
1108 | if (!ret) { | 1179 | if (!ret) { |
1109 | ssize_t ret2; | 1180 | ssize_t ret2; |
1110 | 1181 | ||
1111 | /* Catch -EAGAIN return for forced non-blocking submission */ | ||
1112 | ret2 = call_read_iter(file, kiocb, &iter); | 1182 | ret2 = call_read_iter(file, kiocb, &iter); |
1183 | /* | ||
1184 | * In case of a short read, punt to async. This can happen | ||
1185 | * if we have data partially cached. Alternatively we can | ||
1186 | * return the short read, in which case the application will | ||
1187 | * need to issue another SQE and wait for it. That SQE will | ||
1188 | * need async punt anyway, so it's more efficient to do it | ||
1189 | * here. | ||
1190 | */ | ||
1191 | if (force_nonblock && ret2 > 0 && ret2 < read_size) | ||
1192 | ret2 = -EAGAIN; | ||
1193 | /* Catch -EAGAIN return for forced non-blocking submission */ | ||
1113 | if (!force_nonblock || ret2 != -EAGAIN) { | 1194 | if (!force_nonblock || ret2 != -EAGAIN) { |
1114 | io_rw_done(kiocb, ret2); | 1195 | io_rw_done(kiocb, ret2); |
1115 | } else { | 1196 | } else { |
@@ -1134,7 +1215,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, | |||
1134 | struct iov_iter iter; | 1215 | struct iov_iter iter; |
1135 | struct file *file; | 1216 | struct file *file; |
1136 | size_t iov_count; | 1217 | size_t iov_count; |
1137 | int ret; | 1218 | ssize_t ret; |
1138 | 1219 | ||
1139 | ret = io_prep_rw(req, s, force_nonblock); | 1220 | ret = io_prep_rw(req, s, force_nonblock); |
1140 | if (ret) | 1221 | if (ret) |
@@ -1147,9 +1228,12 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, | |||
1147 | return -EINVAL; | 1228 | return -EINVAL; |
1148 | 1229 | ||
1149 | ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter); | 1230 | ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter); |
1150 | if (ret) | 1231 | if (ret < 0) |
1151 | return ret; | 1232 | return ret; |
1152 | 1233 | ||
1234 | if (req->flags & REQ_F_LINK) | ||
1235 | req->result = ret; | ||
1236 | |||
1153 | iov_count = iov_iter_count(&iter); | 1237 | iov_count = iov_iter_count(&iter); |
1154 | 1238 | ||
1155 | ret = -EAGAIN; | 1239 | ret = -EAGAIN; |
@@ -1253,6 +1337,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, | |||
1253 | end > 0 ? end : LLONG_MAX, | 1337 | end > 0 ? end : LLONG_MAX, |
1254 | fsync_flags & IORING_FSYNC_DATASYNC); | 1338 | fsync_flags & IORING_FSYNC_DATASYNC); |
1255 | 1339 | ||
1340 | if (ret < 0 && (req->flags & REQ_F_LINK)) | ||
1341 | req->flags |= REQ_F_FAIL_LINK; | ||
1256 | io_cqring_add_event(req->ctx, sqe->user_data, ret); | 1342 | io_cqring_add_event(req->ctx, sqe->user_data, ret); |
1257 | io_put_req(req); | 1343 | io_put_req(req); |
1258 | return 0; | 1344 | return 0; |
@@ -1297,11 +1383,70 @@ static int io_sync_file_range(struct io_kiocb *req, | |||
1297 | 1383 | ||
1298 | ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); | 1384 | ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); |
1299 | 1385 | ||
1386 | if (ret < 0 && (req->flags & REQ_F_LINK)) | ||
1387 | req->flags |= REQ_F_FAIL_LINK; | ||
1300 | io_cqring_add_event(req->ctx, sqe->user_data, ret); | 1388 | io_cqring_add_event(req->ctx, sqe->user_data, ret); |
1301 | io_put_req(req); | 1389 | io_put_req(req); |
1302 | return 0; | 1390 | return 0; |
1303 | } | 1391 | } |
1304 | 1392 | ||
1393 | #if defined(CONFIG_NET) | ||
1394 | static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, | ||
1395 | bool force_nonblock, | ||
1396 | long (*fn)(struct socket *, struct user_msghdr __user *, | ||
1397 | unsigned int)) | ||
1398 | { | ||
1399 | struct socket *sock; | ||
1400 | int ret; | ||
1401 | |||
1402 | if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) | ||
1403 | return -EINVAL; | ||
1404 | |||
1405 | sock = sock_from_file(req->file, &ret); | ||
1406 | if (sock) { | ||
1407 | struct user_msghdr __user *msg; | ||
1408 | unsigned flags; | ||
1409 | |||
1410 | flags = READ_ONCE(sqe->msg_flags); | ||
1411 | if (flags & MSG_DONTWAIT) | ||
1412 | req->flags |= REQ_F_NOWAIT; | ||
1413 | else if (force_nonblock) | ||
1414 | flags |= MSG_DONTWAIT; | ||
1415 | |||
1416 | msg = (struct user_msghdr __user *) (unsigned long) | ||
1417 | READ_ONCE(sqe->addr); | ||
1418 | |||
1419 | ret = fn(sock, msg, flags); | ||
1420 | if (force_nonblock && ret == -EAGAIN) | ||
1421 | return ret; | ||
1422 | } | ||
1423 | |||
1424 | io_cqring_add_event(req->ctx, sqe->user_data, ret); | ||
1425 | io_put_req(req); | ||
1426 | return 0; | ||
1427 | } | ||
1428 | #endif | ||
1429 | |||
1430 | static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, | ||
1431 | bool force_nonblock) | ||
1432 | { | ||
1433 | #if defined(CONFIG_NET) | ||
1434 | return io_send_recvmsg(req, sqe, force_nonblock, __sys_sendmsg_sock); | ||
1435 | #else | ||
1436 | return -EOPNOTSUPP; | ||
1437 | #endif | ||
1438 | } | ||
1439 | |||
1440 | static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, | ||
1441 | bool force_nonblock) | ||
1442 | { | ||
1443 | #if defined(CONFIG_NET) | ||
1444 | return io_send_recvmsg(req, sqe, force_nonblock, __sys_recvmsg_sock); | ||
1445 | #else | ||
1446 | return -EOPNOTSUPP; | ||
1447 | #endif | ||
1448 | } | ||
1449 | |||
1305 | static void io_poll_remove_one(struct io_kiocb *req) | 1450 | static void io_poll_remove_one(struct io_kiocb *req) |
1306 | { | 1451 | { |
1307 | struct io_poll_iocb *poll = &req->poll; | 1452 | struct io_poll_iocb *poll = &req->poll; |
@@ -1549,9 +1694,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, | |||
1549 | { | 1694 | { |
1550 | int ret, opcode; | 1695 | int ret, opcode; |
1551 | 1696 | ||
1697 | req->user_data = READ_ONCE(s->sqe->user_data); | ||
1698 | |||
1552 | if (unlikely(s->index >= ctx->sq_entries)) | 1699 | if (unlikely(s->index >= ctx->sq_entries)) |
1553 | return -EINVAL; | 1700 | return -EINVAL; |
1554 | req->user_data = READ_ONCE(s->sqe->user_data); | ||
1555 | 1701 | ||
1556 | opcode = READ_ONCE(s->sqe->opcode); | 1702 | opcode = READ_ONCE(s->sqe->opcode); |
1557 | switch (opcode) { | 1703 | switch (opcode) { |
@@ -1586,6 +1732,12 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, | |||
1586 | case IORING_OP_SYNC_FILE_RANGE: | 1732 | case IORING_OP_SYNC_FILE_RANGE: |
1587 | ret = io_sync_file_range(req, s->sqe, force_nonblock); | 1733 | ret = io_sync_file_range(req, s->sqe, force_nonblock); |
1588 | break; | 1734 | break; |
1735 | case IORING_OP_SENDMSG: | ||
1736 | ret = io_sendmsg(req, s->sqe, force_nonblock); | ||
1737 | break; | ||
1738 | case IORING_OP_RECVMSG: | ||
1739 | ret = io_recvmsg(req, s->sqe, force_nonblock); | ||
1740 | break; | ||
1589 | default: | 1741 | default: |
1590 | ret = -EINVAL; | 1742 | ret = -EINVAL; |
1591 | break; | 1743 | break; |
@@ -1595,7 +1747,7 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, | |||
1595 | return ret; | 1747 | return ret; |
1596 | 1748 | ||
1597 | if (ctx->flags & IORING_SETUP_IOPOLL) { | 1749 | if (ctx->flags & IORING_SETUP_IOPOLL) { |
1598 | if (req->error == -EAGAIN) | 1750 | if (req->result == -EAGAIN) |
1599 | return -EAGAIN; | 1751 | return -EAGAIN; |
1600 | 1752 | ||
1601 | /* workqueue context doesn't hold uring_lock, grab it now */ | 1753 | /* workqueue context doesn't hold uring_lock, grab it now */ |
@@ -1819,31 +1971,11 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, | |||
1819 | return 0; | 1971 | return 0; |
1820 | } | 1972 | } |
1821 | 1973 | ||
1822 | static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, | 1974 | static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, |
1823 | struct io_submit_state *state) | 1975 | struct sqe_submit *s) |
1824 | { | 1976 | { |
1825 | struct io_kiocb *req; | ||
1826 | int ret; | 1977 | int ret; |
1827 | 1978 | ||
1828 | /* enforce forwards compatibility on users */ | ||
1829 | if (unlikely(s->sqe->flags & ~(IOSQE_FIXED_FILE | IOSQE_IO_DRAIN))) | ||
1830 | return -EINVAL; | ||
1831 | |||
1832 | req = io_get_req(ctx, state); | ||
1833 | if (unlikely(!req)) | ||
1834 | return -EAGAIN; | ||
1835 | |||
1836 | ret = io_req_set_file(ctx, s, state, req); | ||
1837 | if (unlikely(ret)) | ||
1838 | goto out; | ||
1839 | |||
1840 | ret = io_req_defer(ctx, req, s->sqe); | ||
1841 | if (ret) { | ||
1842 | if (ret == -EIOCBQUEUED) | ||
1843 | ret = 0; | ||
1844 | return ret; | ||
1845 | } | ||
1846 | |||
1847 | ret = __io_submit_sqe(ctx, req, s, true); | 1979 | ret = __io_submit_sqe(ctx, req, s, true); |
1848 | if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { | 1980 | if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { |
1849 | struct io_uring_sqe *sqe_copy; | 1981 | struct io_uring_sqe *sqe_copy; |
@@ -1866,24 +1998,93 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, | |||
1866 | 1998 | ||
1867 | /* | 1999 | /* |
1868 | * Queued up for async execution, worker will release | 2000 | * Queued up for async execution, worker will release |
1869 | * submit reference when the iocb is actually | 2001 | * submit reference when the iocb is actually submitted. |
1870 | * submitted. | ||
1871 | */ | 2002 | */ |
1872 | return 0; | 2003 | return 0; |
1873 | } | 2004 | } |
1874 | } | 2005 | } |
1875 | 2006 | ||
1876 | out: | ||
1877 | /* drop submission reference */ | 2007 | /* drop submission reference */ |
1878 | io_put_req(req); | 2008 | io_put_req(req); |
1879 | 2009 | ||
1880 | /* and drop final reference, if we failed */ | 2010 | /* and drop final reference, if we failed */ |
1881 | if (ret) | 2011 | if (ret) { |
2012 | io_cqring_add_event(ctx, req->user_data, ret); | ||
2013 | if (req->flags & REQ_F_LINK) | ||
2014 | req->flags |= REQ_F_FAIL_LINK; | ||
1882 | io_put_req(req); | 2015 | io_put_req(req); |
2016 | } | ||
1883 | 2017 | ||
1884 | return ret; | 2018 | return ret; |
1885 | } | 2019 | } |
1886 | 2020 | ||
2021 | #define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK) | ||
2022 | |||
2023 | static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, | ||
2024 | struct io_submit_state *state, struct io_kiocb **link) | ||
2025 | { | ||
2026 | struct io_uring_sqe *sqe_copy; | ||
2027 | struct io_kiocb *req; | ||
2028 | int ret; | ||
2029 | |||
2030 | /* enforce forwards compatibility on users */ | ||
2031 | if (unlikely(s->sqe->flags & ~SQE_VALID_FLAGS)) { | ||
2032 | ret = -EINVAL; | ||
2033 | goto err; | ||
2034 | } | ||
2035 | |||
2036 | req = io_get_req(ctx, state); | ||
2037 | if (unlikely(!req)) { | ||
2038 | ret = -EAGAIN; | ||
2039 | goto err; | ||
2040 | } | ||
2041 | |||
2042 | ret = io_req_set_file(ctx, s, state, req); | ||
2043 | if (unlikely(ret)) { | ||
2044 | err_req: | ||
2045 | io_free_req(req); | ||
2046 | err: | ||
2047 | io_cqring_add_event(ctx, s->sqe->user_data, ret); | ||
2048 | return; | ||
2049 | } | ||
2050 | |||
2051 | ret = io_req_defer(ctx, req, s->sqe); | ||
2052 | if (ret) { | ||
2053 | if (ret != -EIOCBQUEUED) | ||
2054 | goto err_req; | ||
2055 | return; | ||
2056 | } | ||
2057 | |||
2058 | /* | ||
2059 | * If we already have a head request, queue this one for async | ||
2060 | * submittal once the head completes. If we don't have a head but | ||
2061 | * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be | ||
2062 | * submitted sync once the chain is complete. If none of those | ||
2063 | * conditions are true (normal request), then just queue it. | ||
2064 | */ | ||
2065 | if (*link) { | ||
2066 | struct io_kiocb *prev = *link; | ||
2067 | |||
2068 | sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL); | ||
2069 | if (!sqe_copy) { | ||
2070 | ret = -EAGAIN; | ||
2071 | goto err_req; | ||
2072 | } | ||
2073 | |||
2074 | s->sqe = sqe_copy; | ||
2075 | memcpy(&req->submit, s, sizeof(*s)); | ||
2076 | list_add_tail(&req->list, &prev->link_list); | ||
2077 | } else if (s->sqe->flags & IOSQE_IO_LINK) { | ||
2078 | req->flags |= REQ_F_LINK; | ||
2079 | |||
2080 | memcpy(&req->submit, s, sizeof(*s)); | ||
2081 | INIT_LIST_HEAD(&req->link_list); | ||
2082 | *link = req; | ||
2083 | } else { | ||
2084 | io_queue_sqe(ctx, req, s); | ||
2085 | } | ||
2086 | } | ||
2087 | |||
1887 | /* | 2088 | /* |
1888 | * Batched submission is done, ensure local IO is flushed out. | 2089 | * Batched submission is done, ensure local IO is flushed out. |
1889 | */ | 2090 | */ |
@@ -1966,7 +2167,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, | |||
1966 | unsigned int nr, bool has_user, bool mm_fault) | 2167 | unsigned int nr, bool has_user, bool mm_fault) |
1967 | { | 2168 | { |
1968 | struct io_submit_state state, *statep = NULL; | 2169 | struct io_submit_state state, *statep = NULL; |
1969 | int ret, i, submitted = 0; | 2170 | struct io_kiocb *link = NULL; |
2171 | bool prev_was_link = false; | ||
2172 | int i, submitted = 0; | ||
1970 | 2173 | ||
1971 | if (nr > IO_PLUG_THRESHOLD) { | 2174 | if (nr > IO_PLUG_THRESHOLD) { |
1972 | io_submit_state_start(&state, ctx, nr); | 2175 | io_submit_state_start(&state, ctx, nr); |
@@ -1974,22 +2177,30 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, | |||
1974 | } | 2177 | } |
1975 | 2178 | ||
1976 | for (i = 0; i < nr; i++) { | 2179 | for (i = 0; i < nr; i++) { |
2180 | /* | ||
2181 | * If previous wasn't linked and we have a linked command, | ||
2182 | * that's the end of the chain. Submit the previous link. | ||
2183 | */ | ||
2184 | if (!prev_was_link && link) { | ||
2185 | io_queue_sqe(ctx, link, &link->submit); | ||
2186 | link = NULL; | ||
2187 | } | ||
2188 | prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0; | ||
2189 | |||
1977 | if (unlikely(mm_fault)) { | 2190 | if (unlikely(mm_fault)) { |
1978 | ret = -EFAULT; | 2191 | io_cqring_add_event(ctx, sqes[i].sqe->user_data, |
2192 | -EFAULT); | ||
1979 | } else { | 2193 | } else { |
1980 | sqes[i].has_user = has_user; | 2194 | sqes[i].has_user = has_user; |
1981 | sqes[i].needs_lock = true; | 2195 | sqes[i].needs_lock = true; |
1982 | sqes[i].needs_fixed_file = true; | 2196 | sqes[i].needs_fixed_file = true; |
1983 | ret = io_submit_sqe(ctx, &sqes[i], statep); | 2197 | io_submit_sqe(ctx, &sqes[i], statep, &link); |
1984 | } | ||
1985 | if (!ret) { | ||
1986 | submitted++; | 2198 | submitted++; |
1987 | continue; | ||
1988 | } | 2199 | } |
1989 | |||
1990 | io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret); | ||
1991 | } | 2200 | } |
1992 | 2201 | ||
2202 | if (link) | ||
2203 | io_queue_sqe(ctx, link, &link->submit); | ||
1993 | if (statep) | 2204 | if (statep) |
1994 | io_submit_state_end(&state); | 2205 | io_submit_state_end(&state); |
1995 | 2206 | ||
@@ -2006,6 +2217,8 @@ static int io_sq_thread(void *data) | |||
2006 | unsigned inflight; | 2217 | unsigned inflight; |
2007 | unsigned long timeout; | 2218 | unsigned long timeout; |
2008 | 2219 | ||
2220 | complete(&ctx->sqo_thread_started); | ||
2221 | |||
2009 | old_fs = get_fs(); | 2222 | old_fs = get_fs(); |
2010 | set_fs(USER_DS); | 2223 | set_fs(USER_DS); |
2011 | 2224 | ||
@@ -2130,6 +2343,8 @@ static int io_sq_thread(void *data) | |||
2130 | static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) | 2343 | static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) |
2131 | { | 2344 | { |
2132 | struct io_submit_state state, *statep = NULL; | 2345 | struct io_submit_state state, *statep = NULL; |
2346 | struct io_kiocb *link = NULL; | ||
2347 | bool prev_was_link = false; | ||
2133 | int i, submit = 0; | 2348 | int i, submit = 0; |
2134 | 2349 | ||
2135 | if (to_submit > IO_PLUG_THRESHOLD) { | 2350 | if (to_submit > IO_PLUG_THRESHOLD) { |
@@ -2139,22 +2354,30 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) | |||
2139 | 2354 | ||
2140 | for (i = 0; i < to_submit; i++) { | 2355 | for (i = 0; i < to_submit; i++) { |
2141 | struct sqe_submit s; | 2356 | struct sqe_submit s; |
2142 | int ret; | ||
2143 | 2357 | ||
2144 | if (!io_get_sqring(ctx, &s)) | 2358 | if (!io_get_sqring(ctx, &s)) |
2145 | break; | 2359 | break; |
2146 | 2360 | ||
2361 | /* | ||
2362 | * If previous wasn't linked and we have a linked command, | ||
2363 | * that's the end of the chain. Submit the previous link. | ||
2364 | */ | ||
2365 | if (!prev_was_link && link) { | ||
2366 | io_queue_sqe(ctx, link, &link->submit); | ||
2367 | link = NULL; | ||
2368 | } | ||
2369 | prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0; | ||
2370 | |||
2147 | s.has_user = true; | 2371 | s.has_user = true; |
2148 | s.needs_lock = false; | 2372 | s.needs_lock = false; |
2149 | s.needs_fixed_file = false; | 2373 | s.needs_fixed_file = false; |
2150 | submit++; | 2374 | submit++; |
2151 | 2375 | io_submit_sqe(ctx, &s, statep, &link); | |
2152 | ret = io_submit_sqe(ctx, &s, statep); | ||
2153 | if (ret) | ||
2154 | io_cqring_add_event(ctx, s.sqe->user_data, ret); | ||
2155 | } | 2376 | } |
2156 | io_commit_sqring(ctx); | 2377 | io_commit_sqring(ctx); |
2157 | 2378 | ||
2379 | if (link) | ||
2380 | io_queue_sqe(ctx, link, &link->submit); | ||
2158 | if (statep) | 2381 | if (statep) |
2159 | io_submit_state_end(statep); | 2382 | io_submit_state_end(statep); |
2160 | 2383 | ||
@@ -2240,6 +2463,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) | |||
2240 | static void io_sq_thread_stop(struct io_ring_ctx *ctx) | 2463 | static void io_sq_thread_stop(struct io_ring_ctx *ctx) |
2241 | { | 2464 | { |
2242 | if (ctx->sqo_thread) { | 2465 | if (ctx->sqo_thread) { |
2466 | wait_for_completion(&ctx->sqo_thread_started); | ||
2243 | /* | 2467 | /* |
2244 | * The park is a bit of a work-around, without it we get | 2468 | * The park is a bit of a work-around, without it we get |
2245 | * warning spews on shutdown with SQPOLL set and affinity | 2469 | * warning spews on shutdown with SQPOLL set and affinity |
diff --git a/fs/splice.c b/fs/splice.c index 14cb602d9a2f..98412721f056 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1356,7 +1356,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, | |||
1356 | struct iovec iovstack[UIO_FASTIOV]; | 1356 | struct iovec iovstack[UIO_FASTIOV]; |
1357 | struct iovec *iov = iovstack; | 1357 | struct iovec *iov = iovstack; |
1358 | struct iov_iter iter; | 1358 | struct iov_iter iter; |
1359 | long error; | 1359 | ssize_t error; |
1360 | struct fd f; | 1360 | struct fd f; |
1361 | int type; | 1361 | int type; |
1362 | 1362 | ||
@@ -1367,7 +1367,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, | |||
1367 | 1367 | ||
1368 | error = import_iovec(type, uiov, nr_segs, | 1368 | error = import_iovec(type, uiov, nr_segs, |
1369 | ARRAY_SIZE(iovstack), &iov, &iter); | 1369 | ARRAY_SIZE(iovstack), &iov, &iter); |
1370 | if (!error) { | 1370 | if (error >= 0) { |
1371 | error = do_vmsplice(f.file, &iter, flags); | 1371 | error = do_vmsplice(f.file, &iter, flags); |
1372 | kfree(iov); | 1372 | kfree(iov); |
1373 | } | 1373 | } |
@@ -1382,7 +1382,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io | |||
1382 | struct iovec iovstack[UIO_FASTIOV]; | 1382 | struct iovec iovstack[UIO_FASTIOV]; |
1383 | struct iovec *iov = iovstack; | 1383 | struct iovec *iov = iovstack; |
1384 | struct iov_iter iter; | 1384 | struct iov_iter iter; |
1385 | long error; | 1385 | ssize_t error; |
1386 | struct fd f; | 1386 | struct fd f; |
1387 | int type; | 1387 | int type; |
1388 | 1388 | ||
@@ -1393,7 +1393,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io | |||
1393 | 1393 | ||
1394 | error = compat_import_iovec(type, iov32, nr_segs, | 1394 | error = compat_import_iovec(type, iov32, nr_segs, |
1395 | ARRAY_SIZE(iovstack), &iov, &iter); | 1395 | ARRAY_SIZE(iovstack), &iov, &iter); |
1396 | if (!error) { | 1396 | if (error >= 0) { |
1397 | error = do_vmsplice(f.file, &iter, flags); | 1397 | error = do_vmsplice(f.file, &iter, flags); |
1398 | kfree(iov); | 1398 | kfree(iov); |
1399 | } | 1399 | } |
diff --git a/include/linux/socket.h b/include/linux/socket.h index b57cd8bf96e2..97523818cb14 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | struct pid; | 13 | struct pid; |
14 | struct cred; | 14 | struct cred; |
15 | struct socket; | ||
15 | 16 | ||
16 | #define __sockaddr_check_size(size) \ | 17 | #define __sockaddr_check_size(size) \ |
17 | BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage))) | 18 | BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage))) |
@@ -374,6 +375,12 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, | |||
374 | extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, | 375 | extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, |
375 | unsigned int vlen, unsigned int flags, | 376 | unsigned int vlen, unsigned int flags, |
376 | bool forbid_cmsg_compat); | 377 | bool forbid_cmsg_compat); |
378 | extern long __sys_sendmsg_sock(struct socket *sock, | ||
379 | struct user_msghdr __user *msg, | ||
380 | unsigned int flags); | ||
381 | extern long __sys_recvmsg_sock(struct socket *sock, | ||
382 | struct user_msghdr __user *msg, | ||
383 | unsigned int flags); | ||
377 | 384 | ||
378 | /* helpers which do the actual work for syscalls */ | 385 | /* helpers which do the actual work for syscalls */ |
379 | extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, | 386 | extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, |
diff --git a/include/linux/uio.h b/include/linux/uio.h index cea1761c5672..ab5f523bc0df 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h | |||
@@ -267,13 +267,13 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct | |||
267 | size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, | 267 | size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, |
268 | struct iov_iter *i); | 268 | struct iov_iter *i); |
269 | 269 | ||
270 | int import_iovec(int type, const struct iovec __user * uvector, | 270 | ssize_t import_iovec(int type, const struct iovec __user * uvector, |
271 | unsigned nr_segs, unsigned fast_segs, | 271 | unsigned nr_segs, unsigned fast_segs, |
272 | struct iovec **iov, struct iov_iter *i); | 272 | struct iovec **iov, struct iov_iter *i); |
273 | 273 | ||
274 | #ifdef CONFIG_COMPAT | 274 | #ifdef CONFIG_COMPAT |
275 | struct compat_iovec; | 275 | struct compat_iovec; |
276 | int compat_import_iovec(int type, const struct compat_iovec __user * uvector, | 276 | ssize_t compat_import_iovec(int type, const struct compat_iovec __user * uvector, |
277 | unsigned nr_segs, unsigned fast_segs, | 277 | unsigned nr_segs, unsigned fast_segs, |
278 | struct iovec **iov, struct iov_iter *i); | 278 | struct iovec **iov, struct iov_iter *i); |
279 | #endif | 279 | #endif |
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a0c460025036..1e1652f25cc1 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h | |||
@@ -27,6 +27,7 @@ struct io_uring_sqe { | |||
27 | __u32 fsync_flags; | 27 | __u32 fsync_flags; |
28 | __u16 poll_events; | 28 | __u16 poll_events; |
29 | __u32 sync_range_flags; | 29 | __u32 sync_range_flags; |
30 | __u32 msg_flags; | ||
30 | }; | 31 | }; |
31 | __u64 user_data; /* data to be passed back at completion time */ | 32 | __u64 user_data; /* data to be passed back at completion time */ |
32 | union { | 33 | union { |
@@ -40,6 +41,7 @@ struct io_uring_sqe { | |||
40 | */ | 41 | */ |
41 | #define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */ | 42 | #define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */ |
42 | #define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */ | 43 | #define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */ |
44 | #define IOSQE_IO_LINK (1U << 2) /* links next sqe */ | ||
43 | 45 | ||
44 | /* | 46 | /* |
45 | * io_uring_setup() flags | 47 | * io_uring_setup() flags |
@@ -57,6 +59,8 @@ struct io_uring_sqe { | |||
57 | #define IORING_OP_POLL_ADD 6 | 59 | #define IORING_OP_POLL_ADD 6 |
58 | #define IORING_OP_POLL_REMOVE 7 | 60 | #define IORING_OP_POLL_REMOVE 7 |
59 | #define IORING_OP_SYNC_FILE_RANGE 8 | 61 | #define IORING_OP_SYNC_FILE_RANGE 8 |
62 | #define IORING_OP_SENDMSG 9 | ||
63 | #define IORING_OP_RECVMSG 10 | ||
60 | 64 | ||
61 | /* | 65 | /* |
62 | * sqe->fsync_flags | 66 | * sqe->fsync_flags |
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f99c41d4eb54..f1e0569b4539 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c | |||
@@ -1634,9 +1634,9 @@ EXPORT_SYMBOL(dup_iter); | |||
1634 | * on-stack array was used or not (and regardless of whether this function | 1634 | * on-stack array was used or not (and regardless of whether this function |
1635 | * returns an error or not). | 1635 | * returns an error or not). |
1636 | * | 1636 | * |
1637 | * Return: 0 on success or negative error code on error. | 1637 | * Return: Negative error code on error, bytes imported on success |
1638 | */ | 1638 | */ |
1639 | int import_iovec(int type, const struct iovec __user * uvector, | 1639 | ssize_t import_iovec(int type, const struct iovec __user * uvector, |
1640 | unsigned nr_segs, unsigned fast_segs, | 1640 | unsigned nr_segs, unsigned fast_segs, |
1641 | struct iovec **iov, struct iov_iter *i) | 1641 | struct iovec **iov, struct iov_iter *i) |
1642 | { | 1642 | { |
@@ -1652,16 +1652,17 @@ int import_iovec(int type, const struct iovec __user * uvector, | |||
1652 | } | 1652 | } |
1653 | iov_iter_init(i, type, p, nr_segs, n); | 1653 | iov_iter_init(i, type, p, nr_segs, n); |
1654 | *iov = p == *iov ? NULL : p; | 1654 | *iov = p == *iov ? NULL : p; |
1655 | return 0; | 1655 | return n; |
1656 | } | 1656 | } |
1657 | EXPORT_SYMBOL(import_iovec); | 1657 | EXPORT_SYMBOL(import_iovec); |
1658 | 1658 | ||
1659 | #ifdef CONFIG_COMPAT | 1659 | #ifdef CONFIG_COMPAT |
1660 | #include <linux/compat.h> | 1660 | #include <linux/compat.h> |
1661 | 1661 | ||
1662 | int compat_import_iovec(int type, const struct compat_iovec __user * uvector, | 1662 | ssize_t compat_import_iovec(int type, |
1663 | unsigned nr_segs, unsigned fast_segs, | 1663 | const struct compat_iovec __user * uvector, |
1664 | struct iovec **iov, struct iov_iter *i) | 1664 | unsigned nr_segs, unsigned fast_segs, |
1665 | struct iovec **iov, struct iov_iter *i) | ||
1665 | { | 1666 | { |
1666 | ssize_t n; | 1667 | ssize_t n; |
1667 | struct iovec *p; | 1668 | struct iovec *p; |
@@ -1675,7 +1676,7 @@ int compat_import_iovec(int type, const struct compat_iovec __user * uvector, | |||
1675 | } | 1676 | } |
1676 | iov_iter_init(i, type, p, nr_segs, n); | 1677 | iov_iter_init(i, type, p, nr_segs, n); |
1677 | *iov = p == *iov ? NULL : p; | 1678 | *iov = p == *iov ? NULL : p; |
1678 | return 0; | 1679 | return n; |
1679 | } | 1680 | } |
1680 | #endif | 1681 | #endif |
1681 | 1682 | ||
diff --git a/net/compat.c b/net/compat.c index 3f9ce609397f..0f7ded26059e 100644 --- a/net/compat.c +++ b/net/compat.c | |||
@@ -80,9 +80,10 @@ int get_compat_msghdr(struct msghdr *kmsg, | |||
80 | 80 | ||
81 | kmsg->msg_iocb = NULL; | 81 | kmsg->msg_iocb = NULL; |
82 | 82 | ||
83 | return compat_import_iovec(save_addr ? READ : WRITE, | 83 | err = compat_import_iovec(save_addr ? READ : WRITE, |
84 | compat_ptr(msg.msg_iov), msg.msg_iovlen, | 84 | compat_ptr(msg.msg_iov), msg.msg_iovlen, |
85 | UIO_FASTIOV, iov, &kmsg->msg_iter); | 85 | UIO_FASTIOV, iov, &kmsg->msg_iter); |
86 | return err < 0 ? err : 0; | ||
86 | } | 87 | } |
87 | 88 | ||
88 | /* Bleech... */ | 89 | /* Bleech... */ |
diff --git a/net/socket.c b/net/socket.c index 16449d6daeca..293d56836f01 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -2222,9 +2222,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, | |||
2222 | 2222 | ||
2223 | kmsg->msg_iocb = NULL; | 2223 | kmsg->msg_iocb = NULL; |
2224 | 2224 | ||
2225 | return import_iovec(save_addr ? READ : WRITE, | 2225 | err = import_iovec(save_addr ? READ : WRITE, |
2226 | msg.msg_iov, msg.msg_iovlen, | 2226 | msg.msg_iov, msg.msg_iovlen, |
2227 | UIO_FASTIOV, iov, &kmsg->msg_iter); | 2227 | UIO_FASTIOV, iov, &kmsg->msg_iter); |
2228 | return err < 0 ? err : 0; | ||
2228 | } | 2229 | } |
2229 | 2230 | ||
2230 | static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, | 2231 | static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, |
@@ -2326,6 +2327,13 @@ out_freeiov: | |||
2326 | /* | 2327 | /* |
2327 | * BSD sendmsg interface | 2328 | * BSD sendmsg interface |
2328 | */ | 2329 | */ |
2330 | long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *msg, | ||
2331 | unsigned int flags) | ||
2332 | { | ||
2333 | struct msghdr msg_sys; | ||
2334 | |||
2335 | return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0); | ||
2336 | } | ||
2329 | 2337 | ||
2330 | long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, | 2338 | long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, |
2331 | bool forbid_cmsg_compat) | 2339 | bool forbid_cmsg_compat) |
@@ -2500,6 +2508,14 @@ out_freeiov: | |||
2500 | * BSD recvmsg interface | 2508 | * BSD recvmsg interface |
2501 | */ | 2509 | */ |
2502 | 2510 | ||
2511 | long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *msg, | ||
2512 | unsigned int flags) | ||
2513 | { | ||
2514 | struct msghdr msg_sys; | ||
2515 | |||
2516 | return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0); | ||
2517 | } | ||
2518 | |||
2503 | long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, | 2519 | long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, |
2504 | bool forbid_cmsg_compat) | 2520 | bool forbid_cmsg_compat) |
2505 | { | 2521 | { |