aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorAnatol Pomozov <anatol.pomozov@gmail.com>2014-04-15 14:31:33 -0400
committerBenjamin LaHaise <bcrl@kvack.org>2014-04-16 13:38:04 -0400
commite02ba72aabfade4c9cd6e3263e9b57bf890ad25c (patch)
tree153f7763591a0cb4497d3b72d9ca644d2f3d4aae /fs
parent10ec34fcb100412ab186c141a9c3557d1270effd (diff)
aio: block io_destroy() until all context requests are completed
deletes aio context and all resources related to. It makes sense that no IO operations connected to the context should be running after the context is destroyed. As we removed io_context we have no chance to get requests status or call io_getevents(). man page for io_destroy says that this function may block until all context's requests are completed. Before kernel 3.11 io_destroy() blocked indeed, but since aio refactoring in 3.11 it is not true anymore. Here is a pseudo-code that shows a testcase for a race condition discovered in 3.11: initialize io_context io_submit(read to buffer) io_destroy() // context is destroyed so we can free the resources free(buffers); // if the buffer is allocated by some other user he'll be surprised // to learn that the buffer still filled by an outstanding operation // from the destroyed io_context The fix is straight-forward - add a completion struct and wait on it in io_destroy, complete() should be called when number of in-fligh requests reaches zero. If two or more io_destroy() called for the same context simultaneously then only the first one waits for IO completion, other calls behaviour is undefined. Tested: ran http://pastebin.com/LrPsQ4RL testcase for several hours and do not see the race condition anymore. Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com> Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c36
1 files changed, 32 insertions, 4 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 12a3de0ee6da..2adbb0398ab9 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -112,6 +112,11 @@ struct kioctx {
112 112
113 struct work_struct free_work; 113 struct work_struct free_work;
114 114
115 /*
116 * signals when all in-flight requests are done
117 */
118 struct completion *requests_done;
119
115 struct { 120 struct {
116 /* 121 /*
117 * This counts the number of available slots in the ringbuffer, 122 * This counts the number of available slots in the ringbuffer,
@@ -508,6 +513,10 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
508{ 513{
509 struct kioctx *ctx = container_of(ref, struct kioctx, reqs); 514 struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
510 515
516 /* At this point we know that there are no any in-flight requests */
517 if (ctx->requests_done)
518 complete(ctx->requests_done);
519
511 INIT_WORK(&ctx->free_work, free_ioctx); 520 INIT_WORK(&ctx->free_work, free_ioctx);
512 schedule_work(&ctx->free_work); 521 schedule_work(&ctx->free_work);
513} 522}
@@ -718,7 +727,8 @@ err:
718 * when the processes owning a context have all exited to encourage 727 * when the processes owning a context have all exited to encourage
719 * the rapid destruction of the kioctx. 728 * the rapid destruction of the kioctx.
720 */ 729 */
721static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) 730static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
731 struct completion *requests_done)
722{ 732{
723 if (!atomic_xchg(&ctx->dead, 1)) { 733 if (!atomic_xchg(&ctx->dead, 1)) {
724 struct kioctx_table *table; 734 struct kioctx_table *table;
@@ -747,7 +757,11 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
747 if (ctx->mmap_size) 757 if (ctx->mmap_size)
748 vm_munmap(ctx->mmap_base, ctx->mmap_size); 758 vm_munmap(ctx->mmap_base, ctx->mmap_size);
749 759
760 ctx->requests_done = requests_done;
750 percpu_ref_kill(&ctx->users); 761 percpu_ref_kill(&ctx->users);
762 } else {
763 if (requests_done)
764 complete(requests_done);
751 } 765 }
752} 766}
753 767
@@ -809,7 +823,7 @@ void exit_aio(struct mm_struct *mm)
809 */ 823 */
810 ctx->mmap_size = 0; 824 ctx->mmap_size = 0;
811 825
812 kill_ioctx(mm, ctx); 826 kill_ioctx(mm, ctx, NULL);
813 } 827 }
814} 828}
815 829
@@ -1185,7 +1199,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
1185 if (!IS_ERR(ioctx)) { 1199 if (!IS_ERR(ioctx)) {
1186 ret = put_user(ioctx->user_id, ctxp); 1200 ret = put_user(ioctx->user_id, ctxp);
1187 if (ret) 1201 if (ret)
1188 kill_ioctx(current->mm, ioctx); 1202 kill_ioctx(current->mm, ioctx, NULL);
1189 percpu_ref_put(&ioctx->users); 1203 percpu_ref_put(&ioctx->users);
1190 } 1204 }
1191 1205
@@ -1203,8 +1217,22 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1203{ 1217{
1204 struct kioctx *ioctx = lookup_ioctx(ctx); 1218 struct kioctx *ioctx = lookup_ioctx(ctx);
1205 if (likely(NULL != ioctx)) { 1219 if (likely(NULL != ioctx)) {
1206 kill_ioctx(current->mm, ioctx); 1220 struct completion requests_done =
1221 COMPLETION_INITIALIZER_ONSTACK(requests_done);
1222
1223 /* Pass requests_done to kill_ioctx() where it can be set
1224 * in a thread-safe way. If we try to set it here then we have
1225 * a race condition if two io_destroy() called simultaneously.
1226 */
1227 kill_ioctx(current->mm, ioctx, &requests_done);
1207 percpu_ref_put(&ioctx->users); 1228 percpu_ref_put(&ioctx->users);
1229
1230 /* Wait until all IO for the context are done. Otherwise kernel
1231 * keep using user-space buffers even if user thinks the context
1232 * is destroyed.
1233 */
1234 wait_for_completion(&requests_done);
1235
1208 return 0; 1236 return 0;
1209 } 1237 }
1210 pr_debug("EINVAL: io_destroy: invalid context id\n"); 1238 pr_debug("EINVAL: io_destroy: invalid context id\n");