aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorKen Chen <kenchen@google.com>2007-02-03 04:13:45 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-03 14:26:06 -0500
commitdee11c2364f51cac53df17d742a0c69097e29a4e (patch)
tree339b45f1dbd99a22bbe161ebeb5b7cd8850289ba /fs
parent3e8219806c33b64a00b0013f96f735451f30c64c (diff)
[PATCH] aio: fix buggy put_ioctx call in aio_complete - v2
An AIO bug was reported that sleeping function is being called in softirq context: BUG: warning at kernel/mutex.c:132/__mutex_lock_common() Call Trace: [<a000000100577b00>] __mutex_lock_slowpath+0x640/0x6c0 [<a000000100577ba0>] mutex_lock+0x20/0x40 [<a0000001000a25b0>] flush_workqueue+0xb0/0x1a0 [<a00000010018c0c0>] __put_ioctx+0xc0/0x240 [<a00000010018d470>] aio_complete+0x2f0/0x420 [<a00000010019cc80>] finished_one_bio+0x200/0x2a0 [<a00000010019d1c0>] dio_bio_complete+0x1c0/0x200 [<a00000010019d260>] dio_bio_end_aio+0x60/0x80 [<a00000010014acd0>] bio_endio+0x110/0x1c0 [<a0000001002770e0>] __end_that_request_first+0x180/0xba0 [<a000000100277b90>] end_that_request_chunk+0x30/0x60 [<a0000002073c0c70>] scsi_end_request+0x50/0x300 [scsi_mod] [<a0000002073c1240>] scsi_io_completion+0x200/0x8a0 [scsi_mod] [<a0000002074729b0>] sd_rw_intr+0x330/0x860 [sd_mod] [<a0000002073b3ac0>] scsi_finish_command+0x100/0x1c0 [scsi_mod] [<a0000002073c2910>] scsi_softirq_done+0x230/0x300 [scsi_mod] [<a000000100277d20>] blk_done_softirq+0x160/0x1c0 [<a000000100083e00>] __do_softirq+0x200/0x240 [<a000000100083eb0>] do_softirq+0x70/0xc0 See report: http://marc.theaimsgroup.com/?l=linux-kernel&m=116599593200888&w=2 flush_workqueue() is not allowed to be called in the softirq context. However, aio_complete() called from I/O interrupt can potentially call put_ioctx with last ref count on ioctx and triggers bug. It is simply incorrect to perform ioctx freeing from aio_complete. The bug is trigger-able from a race between io_destroy() and aio_complete(). A possible scenario: cpu0 cpu1 io_destroy aio_complete wait_for_all_aios { __aio_put_req ... ctx->reqs_active--; if (!ctx->reqs_active) return; } ... put_ioctx(ioctx) put_ioctx(ctx); __put_ioctx bam! Bug trigger! The real problem is that the condition check of ctx->reqs_active in wait_for_all_aios() is incorrect that access to reqs_active is not being properly protected by spin lock. This patch adds that protective spin lock, and at the same time removes all duplicate ref counting for each kiocb as reqs_active is already used as a ref count for each active ioctx. This also ensures that buggy call to flush_workqueue() in softirq context is eliminated. Signed-off-by: "Ken Chen" <kenchen@google.com> Cc: Zach Brown <zach.brown@oracle.com> Cc: Suparna Bhattacharya <suparna@in.ibm.com> Cc: Benjamin LaHaise <bcrl@kvack.org> Cc: Badari Pulavarty <pbadari@us.ibm.com> Cc: <stable@kernel.org> Acked-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c20
1 files changed, 9 insertions, 11 deletions
diff --git a/fs/aio.c b/fs/aio.c
index ee20fc4240e0..55991e4132a7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -298,17 +298,23 @@ static void wait_for_all_aios(struct kioctx *ctx)
298 struct task_struct *tsk = current; 298 struct task_struct *tsk = current;
299 DECLARE_WAITQUEUE(wait, tsk); 299 DECLARE_WAITQUEUE(wait, tsk);
300 300
301 spin_lock_irq(&ctx->ctx_lock);
301 if (!ctx->reqs_active) 302 if (!ctx->reqs_active)
302 return; 303 goto out;
303 304
304 add_wait_queue(&ctx->wait, &wait); 305 add_wait_queue(&ctx->wait, &wait);
305 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 306 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
306 while (ctx->reqs_active) { 307 while (ctx->reqs_active) {
308 spin_unlock_irq(&ctx->ctx_lock);
307 schedule(); 309 schedule();
308 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 310 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
311 spin_lock_irq(&ctx->ctx_lock);
309 } 312 }
310 __set_task_state(tsk, TASK_RUNNING); 313 __set_task_state(tsk, TASK_RUNNING);
311 remove_wait_queue(&ctx->wait, &wait); 314 remove_wait_queue(&ctx->wait, &wait);
315
316out:
317 spin_unlock_irq(&ctx->ctx_lock);
312} 318}
313 319
314/* wait_on_sync_kiocb: 320/* wait_on_sync_kiocb:
@@ -424,7 +430,6 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx)
424 ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0); 430 ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
425 if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) { 431 if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) {
426 list_add(&req->ki_list, &ctx->active_reqs); 432 list_add(&req->ki_list, &ctx->active_reqs);
427 get_ioctx(ctx);
428 ctx->reqs_active++; 433 ctx->reqs_active++;
429 okay = 1; 434 okay = 1;
430 } 435 }
@@ -536,8 +541,6 @@ int fastcall aio_put_req(struct kiocb *req)
536 spin_lock_irq(&ctx->ctx_lock); 541 spin_lock_irq(&ctx->ctx_lock);
537 ret = __aio_put_req(ctx, req); 542 ret = __aio_put_req(ctx, req);
538 spin_unlock_irq(&ctx->ctx_lock); 543 spin_unlock_irq(&ctx->ctx_lock);
539 if (ret)
540 put_ioctx(ctx);
541 return ret; 544 return ret;
542} 545}
543 546
@@ -779,8 +782,7 @@ static int __aio_run_iocbs(struct kioctx *ctx)
779 */ 782 */
780 iocb->ki_users++; /* grab extra reference */ 783 iocb->ki_users++; /* grab extra reference */
781 aio_run_iocb(iocb); 784 aio_run_iocb(iocb);
782 if (__aio_put_req(ctx, iocb)) /* drop extra ref */ 785 __aio_put_req(ctx, iocb);
783 put_ioctx(ctx);
784 } 786 }
785 if (!list_empty(&ctx->run_list)) 787 if (!list_empty(&ctx->run_list))
786 return 1; 788 return 1;
@@ -997,14 +999,10 @@ put_rq:
997 /* everything turned out well, dispose of the aiocb. */ 999 /* everything turned out well, dispose of the aiocb. */
998 ret = __aio_put_req(ctx, iocb); 1000 ret = __aio_put_req(ctx, iocb);
999 1001
1000 spin_unlock_irqrestore(&ctx->ctx_lock, flags);
1001
1002 if (waitqueue_active(&ctx->wait)) 1002 if (waitqueue_active(&ctx->wait))
1003 wake_up(&ctx->wait); 1003 wake_up(&ctx->wait);
1004 1004
1005 if (ret) 1005 spin_unlock_irqrestore(&ctx->ctx_lock, flags);
1006 put_ioctx(ctx);
1007
1008 return ret; 1006 return ret;
1009} 1007}
1010 1008