diff options
author | Jens Axboe <axboe@kernel.dk> | 2019-10-17 11:20:46 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2019-10-17 17:49:11 -0400 |
commit | 491381ce07ca57f68c49c79a8a43da5b60749e32 (patch) | |
tree | fc3ae1c4ef80323a180bd24fd5f68193ca267bc8 /fs | |
parent | 6333ff6e5a43703a8f7ea16b0f2890a38f5467f0 (diff) |
io_uring: fix up O_NONBLOCK handling for sockets
We've got two issues with the non-regular file handling for non-blocking
IO:
1) We don't want to re-do a short read in full for a non-regular file,
as we can't just read the data again.
2) For non-regular files that don't support non-blocking IO attempts,
we need to punt to async context even if the file is opened as
non-blocking. Otherwise the caller always gets -EAGAIN.
Add two new request flags to handle these cases. One is just a cache
of the inode S_ISREG() status, the other tells io_uring that we always
need to punt this request to async context, even if REQ_F_NOWAIT is set.
Cc: stable@vger.kernel.org
Reported-by: Hrvoje Zeba <zeba.hrvoje@gmail.com>
Tested-by: Hrvoje Zeba <zeba.hrvoje@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/io_uring.c | 57 |
1 files changed, 39 insertions, 18 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index d2cb277da2f4..b7d4085d6ffd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c | |||
@@ -322,6 +322,8 @@ struct io_kiocb { | |||
322 | #define REQ_F_FAIL_LINK 256 /* fail rest of links */ | 322 | #define REQ_F_FAIL_LINK 256 /* fail rest of links */ |
323 | #define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */ | 323 | #define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */ |
324 | #define REQ_F_TIMEOUT 1024 /* timeout request */ | 324 | #define REQ_F_TIMEOUT 1024 /* timeout request */ |
325 | #define REQ_F_ISREG 2048 /* regular file */ | ||
326 | #define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */ | ||
325 | u64 user_data; | 327 | u64 user_data; |
326 | u32 result; | 328 | u32 result; |
327 | u32 sequence; | 329 | u32 sequence; |
@@ -914,26 +916,26 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, | |||
914 | return ret; | 916 | return ret; |
915 | } | 917 | } |
916 | 918 | ||
917 | static void kiocb_end_write(struct kiocb *kiocb) | 919 | static void kiocb_end_write(struct io_kiocb *req) |
918 | { | 920 | { |
919 | if (kiocb->ki_flags & IOCB_WRITE) { | 921 | /* |
920 | struct inode *inode = file_inode(kiocb->ki_filp); | 922 | * Tell lockdep we inherited freeze protection from submission |
923 | * thread. | ||
924 | */ | ||
925 | if (req->flags & REQ_F_ISREG) { | ||
926 | struct inode *inode = file_inode(req->file); | ||
921 | 927 | ||
922 | /* | 928 | __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); |
923 | * Tell lockdep we inherited freeze protection from submission | ||
924 | * thread. | ||
925 | */ | ||
926 | if (S_ISREG(inode->i_mode)) | ||
927 | __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); | ||
928 | file_end_write(kiocb->ki_filp); | ||
929 | } | 929 | } |
930 | file_end_write(req->file); | ||
930 | } | 931 | } |
931 | 932 | ||
932 | static void io_complete_rw(struct kiocb *kiocb, long res, long res2) | 933 | static void io_complete_rw(struct kiocb *kiocb, long res, long res2) |
933 | { | 934 | { |
934 | struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); | 935 | struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); |
935 | 936 | ||
936 | kiocb_end_write(kiocb); | 937 | if (kiocb->ki_flags & IOCB_WRITE) |
938 | kiocb_end_write(req); | ||
937 | 939 | ||
938 | if ((req->flags & REQ_F_LINK) && res != req->result) | 940 | if ((req->flags & REQ_F_LINK) && res != req->result) |
939 | req->flags |= REQ_F_FAIL_LINK; | 941 | req->flags |= REQ_F_FAIL_LINK; |
@@ -945,7 +947,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) | |||
945 | { | 947 | { |
946 | struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); | 948 | struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); |
947 | 949 | ||
948 | kiocb_end_write(kiocb); | 950 | if (kiocb->ki_flags & IOCB_WRITE) |
951 | kiocb_end_write(req); | ||
949 | 952 | ||
950 | if ((req->flags & REQ_F_LINK) && res != req->result) | 953 | if ((req->flags & REQ_F_LINK) && res != req->result) |
951 | req->flags |= REQ_F_FAIL_LINK; | 954 | req->flags |= REQ_F_FAIL_LINK; |
@@ -1059,8 +1062,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, | |||
1059 | if (!req->file) | 1062 | if (!req->file) |
1060 | return -EBADF; | 1063 | return -EBADF; |
1061 | 1064 | ||
1062 | if (force_nonblock && !io_file_supports_async(req->file)) | 1065 | if (S_ISREG(file_inode(req->file)->i_mode)) |
1063 | force_nonblock = false; | 1066 | req->flags |= REQ_F_ISREG; |
1067 | |||
1068 | /* | ||
1069 | * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so | ||
1070 | * we know to async punt it even if it was opened O_NONBLOCK | ||
1071 | */ | ||
1072 | if (force_nonblock && !io_file_supports_async(req->file)) { | ||
1073 | req->flags |= REQ_F_MUST_PUNT; | ||
1074 | return -EAGAIN; | ||
1075 | } | ||
1064 | 1076 | ||
1065 | kiocb->ki_pos = READ_ONCE(sqe->off); | 1077 | kiocb->ki_pos = READ_ONCE(sqe->off); |
1066 | kiocb->ki_flags = iocb_flags(kiocb->ki_filp); | 1078 | kiocb->ki_flags = iocb_flags(kiocb->ki_filp); |
@@ -1081,7 +1093,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, | |||
1081 | return ret; | 1093 | return ret; |
1082 | 1094 | ||
1083 | /* don't allow async punt if RWF_NOWAIT was requested */ | 1095 | /* don't allow async punt if RWF_NOWAIT was requested */ |
1084 | if (kiocb->ki_flags & IOCB_NOWAIT) | 1096 | if ((kiocb->ki_flags & IOCB_NOWAIT) || |
1097 | (req->file->f_flags & O_NONBLOCK)) | ||
1085 | req->flags |= REQ_F_NOWAIT; | 1098 | req->flags |= REQ_F_NOWAIT; |
1086 | 1099 | ||
1087 | if (force_nonblock) | 1100 | if (force_nonblock) |
@@ -1382,7 +1395,9 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, | |||
1382 | * need async punt anyway, so it's more efficient to do it | 1395 | * need async punt anyway, so it's more efficient to do it |
1383 | * here. | 1396 | * here. |
1384 | */ | 1397 | */ |
1385 | if (force_nonblock && ret2 > 0 && ret2 < read_size) | 1398 | if (force_nonblock && !(req->flags & REQ_F_NOWAIT) && |
1399 | (req->flags & REQ_F_ISREG) && | ||
1400 | ret2 > 0 && ret2 < read_size) | ||
1386 | ret2 = -EAGAIN; | 1401 | ret2 = -EAGAIN; |
1387 | /* Catch -EAGAIN return for forced non-blocking submission */ | 1402 | /* Catch -EAGAIN return for forced non-blocking submission */ |
1388 | if (!force_nonblock || ret2 != -EAGAIN) { | 1403 | if (!force_nonblock || ret2 != -EAGAIN) { |
@@ -1447,7 +1462,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, | |||
1447 | * released so that it doesn't complain about the held lock when | 1462 | * released so that it doesn't complain about the held lock when |
1448 | * we return to userspace. | 1463 | * we return to userspace. |
1449 | */ | 1464 | */ |
1450 | if (S_ISREG(file_inode(file)->i_mode)) { | 1465 | if (req->flags & REQ_F_ISREG) { |
1451 | __sb_start_write(file_inode(file)->i_sb, | 1466 | __sb_start_write(file_inode(file)->i_sb, |
1452 | SB_FREEZE_WRITE, true); | 1467 | SB_FREEZE_WRITE, true); |
1453 | __sb_writers_release(file_inode(file)->i_sb, | 1468 | __sb_writers_release(file_inode(file)->i_sb, |
@@ -2282,7 +2297,13 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, | |||
2282 | int ret; | 2297 | int ret; |
2283 | 2298 | ||
2284 | ret = __io_submit_sqe(ctx, req, s, force_nonblock); | 2299 | ret = __io_submit_sqe(ctx, req, s, force_nonblock); |
2285 | if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { | 2300 | |
2301 | /* | ||
2302 | * We async punt it if the file wasn't marked NOWAIT, or if the file | ||
2303 | * doesn't support non-blocking read/write attempts | ||
2304 | */ | ||
2305 | if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) || | ||
2306 | (req->flags & REQ_F_MUST_PUNT))) { | ||
2286 | struct io_uring_sqe *sqe_copy; | 2307 | struct io_uring_sqe *sqe_copy; |
2287 | 2308 | ||
2288 | sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL); | 2309 | sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL); |