diff options
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 82 |
1 files changed, 65 insertions, 17 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index e2a66e12fbc6..d542f1cf4428 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c | |||
@@ -202,7 +202,7 @@ struct async_list { | |||
202 | 202 | ||
203 | struct file *file; | 203 | struct file *file; |
204 | off_t io_end; | 204 | off_t io_end; |
205 | size_t io_pages; | 205 | size_t io_len; |
206 | }; | 206 | }; |
207 | 207 | ||
208 | struct io_ring_ctx { | 208 | struct io_ring_ctx { |
@@ -333,7 +333,8 @@ struct io_kiocb { | |||
333 | #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ | 333 | #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ |
334 | #define REQ_F_IO_DRAINED 32 /* drain done */ | 334 | #define REQ_F_IO_DRAINED 32 /* drain done */ |
335 | #define REQ_F_LINK 64 /* linked sqes */ | 335 | #define REQ_F_LINK 64 /* linked sqes */ |
336 | #define REQ_F_FAIL_LINK 128 /* fail rest of links */ | 336 | #define REQ_F_LINK_DONE 128 /* linked sqes done */ |
337 | #define REQ_F_FAIL_LINK 256 /* fail rest of links */ | ||
337 | u64 user_data; | 338 | u64 user_data; |
338 | u32 result; | 339 | u32 result; |
339 | u32 sequence; | 340 | u32 sequence; |
@@ -429,7 +430,7 @@ static inline bool io_sequence_defer(struct io_ring_ctx *ctx, | |||
429 | if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) | 430 | if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) |
430 | return false; | 431 | return false; |
431 | 432 | ||
432 | return req->sequence > ctx->cached_cq_tail + ctx->sq_ring->dropped; | 433 | return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped; |
433 | } | 434 | } |
434 | 435 | ||
435 | static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) | 436 | static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) |
@@ -632,6 +633,7 @@ static void io_req_link_next(struct io_kiocb *req) | |||
632 | nxt->flags |= REQ_F_LINK; | 633 | nxt->flags |= REQ_F_LINK; |
633 | } | 634 | } |
634 | 635 | ||
636 | nxt->flags |= REQ_F_LINK_DONE; | ||
635 | INIT_WORK(&nxt->work, io_sq_wq_submit_work); | 637 | INIT_WORK(&nxt->work, io_sq_wq_submit_work); |
636 | queue_work(req->ctx->sqo_wq, &nxt->work); | 638 | queue_work(req->ctx->sqo_wq, &nxt->work); |
637 | } | 639 | } |
@@ -1064,8 +1066,44 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, | |||
1064 | */ | 1066 | */ |
1065 | offset = buf_addr - imu->ubuf; | 1067 | offset = buf_addr - imu->ubuf; |
1066 | iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); | 1068 | iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); |
1067 | if (offset) | 1069 | |
1068 | iov_iter_advance(iter, offset); | 1070 | if (offset) { |
1071 | /* | ||
1072 | * Don't use iov_iter_advance() here, as it's really slow for | ||
1073 | * using the latter parts of a big fixed buffer - it iterates | ||
1074 | * over each segment manually. We can cheat a bit here, because | ||
1075 | * we know that: | ||
1076 | * | ||
1077 | * 1) it's a BVEC iter, we set it up | ||
1078 | * 2) all bvecs are PAGE_SIZE in size, except potentially the | ||
1079 | * first and last bvec | ||
1080 | * | ||
1081 | * So just find our index, and adjust the iterator afterwards. | ||
1082 | * If the offset is within the first bvec (or the whole first | ||
1083 | * bvec, just use iov_iter_advance(). This makes it easier | ||
1084 | * since we can just skip the first segment, which may not | ||
1085 | * be PAGE_SIZE aligned. | ||
1086 | */ | ||
1087 | const struct bio_vec *bvec = imu->bvec; | ||
1088 | |||
1089 | if (offset <= bvec->bv_len) { | ||
1090 | iov_iter_advance(iter, offset); | ||
1091 | } else { | ||
1092 | unsigned long seg_skip; | ||
1093 | |||
1094 | /* skip first vec */ | ||
1095 | offset -= bvec->bv_len; | ||
1096 | seg_skip = 1 + (offset >> PAGE_SHIFT); | ||
1097 | |||
1098 | iter->bvec = bvec + seg_skip; | ||
1099 | iter->nr_segs -= seg_skip; | ||
1100 | iter->count -= (seg_skip << PAGE_SHIFT); | ||
1101 | iter->iov_offset = offset & ~PAGE_MASK; | ||
1102 | if (iter->iov_offset) | ||
1103 | iter->count -= iter->iov_offset; | ||
1104 | } | ||
1105 | } | ||
1106 | |||
1069 | return 0; | 1107 | return 0; |
1070 | } | 1108 | } |
1071 | 1109 | ||
@@ -1120,28 +1158,26 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len) | |||
1120 | off_t io_end = kiocb->ki_pos + len; | 1158 | off_t io_end = kiocb->ki_pos + len; |
1121 | 1159 | ||
1122 | if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) { | 1160 | if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) { |
1123 | unsigned long max_pages; | 1161 | unsigned long max_bytes; |
1124 | 1162 | ||
1125 | /* Use 8x RA size as a decent limiter for both reads/writes */ | 1163 | /* Use 8x RA size as a decent limiter for both reads/writes */ |
1126 | max_pages = filp->f_ra.ra_pages; | 1164 | max_bytes = filp->f_ra.ra_pages << (PAGE_SHIFT + 3); |
1127 | if (!max_pages) | 1165 | if (!max_bytes) |
1128 | max_pages = VM_READAHEAD_PAGES; | 1166 | max_bytes = VM_READAHEAD_PAGES << (PAGE_SHIFT + 3); |
1129 | max_pages *= 8; | 1167 | |
1130 | 1168 | /* If max len are exceeded, reset the state */ | |
1131 | /* If max pages are exceeded, reset the state */ | 1169 | if (async_list->io_len + len <= max_bytes) { |
1132 | len >>= PAGE_SHIFT; | ||
1133 | if (async_list->io_pages + len <= max_pages) { | ||
1134 | req->flags |= REQ_F_SEQ_PREV; | 1170 | req->flags |= REQ_F_SEQ_PREV; |
1135 | async_list->io_pages += len; | 1171 | async_list->io_len += len; |
1136 | } else { | 1172 | } else { |
1137 | io_end = 0; | 1173 | io_end = 0; |
1138 | async_list->io_pages = 0; | 1174 | async_list->io_len = 0; |
1139 | } | 1175 | } |
1140 | } | 1176 | } |
1141 | 1177 | ||
1142 | /* New file? Reset state. */ | 1178 | /* New file? Reset state. */ |
1143 | if (async_list->file != filp) { | 1179 | if (async_list->file != filp) { |
1144 | async_list->io_pages = 0; | 1180 | async_list->io_len = 0; |
1145 | async_list->file = filp; | 1181 | async_list->file = filp; |
1146 | } | 1182 | } |
1147 | async_list->io_end = io_end; | 1183 | async_list->io_end = io_end; |
@@ -1630,6 +1666,8 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) | |||
1630 | INIT_LIST_HEAD(&poll->wait.entry); | 1666 | INIT_LIST_HEAD(&poll->wait.entry); |
1631 | init_waitqueue_func_entry(&poll->wait, io_poll_wake); | 1667 | init_waitqueue_func_entry(&poll->wait, io_poll_wake); |
1632 | 1668 | ||
1669 | INIT_LIST_HEAD(&req->list); | ||
1670 | |||
1633 | mask = vfs_poll(poll->file, &ipt.pt) & poll->events; | 1671 | mask = vfs_poll(poll->file, &ipt.pt) & poll->events; |
1634 | 1672 | ||
1635 | spin_lock_irq(&ctx->completion_lock); | 1673 | spin_lock_irq(&ctx->completion_lock); |
@@ -1800,6 +1838,7 @@ restart: | |||
1800 | do { | 1838 | do { |
1801 | struct sqe_submit *s = &req->submit; | 1839 | struct sqe_submit *s = &req->submit; |
1802 | const struct io_uring_sqe *sqe = s->sqe; | 1840 | const struct io_uring_sqe *sqe = s->sqe; |
1841 | unsigned int flags = req->flags; | ||
1803 | 1842 | ||
1804 | /* Ensure we clear previously set non-block flag */ | 1843 | /* Ensure we clear previously set non-block flag */ |
1805 | req->rw.ki_flags &= ~IOCB_NOWAIT; | 1844 | req->rw.ki_flags &= ~IOCB_NOWAIT; |
@@ -1844,6 +1883,10 @@ restart: | |||
1844 | /* async context always use a copy of the sqe */ | 1883 | /* async context always use a copy of the sqe */ |
1845 | kfree(sqe); | 1884 | kfree(sqe); |
1846 | 1885 | ||
1886 | /* req from defer and link list needn't decrease async cnt */ | ||
1887 | if (flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE)) | ||
1888 | goto out; | ||
1889 | |||
1847 | if (!async_list) | 1890 | if (!async_list) |
1848 | break; | 1891 | break; |
1849 | if (!list_empty(&req_list)) { | 1892 | if (!list_empty(&req_list)) { |
@@ -1891,6 +1934,7 @@ restart: | |||
1891 | } | 1934 | } |
1892 | } | 1935 | } |
1893 | 1936 | ||
1937 | out: | ||
1894 | if (cur_mm) { | 1938 | if (cur_mm) { |
1895 | set_fs(old_fs); | 1939 | set_fs(old_fs); |
1896 | unuse_mm(cur_mm); | 1940 | unuse_mm(cur_mm); |
@@ -1917,6 +1961,10 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req) | |||
1917 | ret = true; | 1961 | ret = true; |
1918 | spin_lock(&list->lock); | 1962 | spin_lock(&list->lock); |
1919 | list_add_tail(&req->list, &list->list); | 1963 | list_add_tail(&req->list, &list->list); |
1964 | /* | ||
1965 | * Ensure we see a simultaneous modification from io_sq_wq_submit_work() | ||
1966 | */ | ||
1967 | smp_mb(); | ||
1920 | if (!atomic_read(&list->cnt)) { | 1968 | if (!atomic_read(&list->cnt)) { |
1921 | list_del_init(&req->list); | 1969 | list_del_init(&req->list); |
1922 | ret = false; | 1970 | ret = false; |