aboutsummaryrefslogtreecommitdiffstats
path: root/fs/aio.c
diff options
context:
space:
mode:
authorZach Brown <zach.brown@oracle.com>2006-12-29 19:47:02 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-30 13:55:54 -0500
commit1ebb1101c556b1915ff041655e629a072e64dcda (patch)
treed306821837152a8c772591a9bf136e5e091a7a31 /fs/aio.c
parent43cdff92ad47e0ca024c8a07d29f9bb6119e759c (diff)
[PATCH] Fix lock inversion aio_kick_handler()
lockdep found a AB BC CA lock inversion in retry-based AIO: 1) The task struct's alloc_lock (A) is acquired in process context with interrupts enabled. An interrupt might arrive and call wake_up() which grabs the wait queue's q->lock (B). 2) When performing retry-based AIO the AIO core registers aio_wake_function() as the wake funtion for iocb->ki_wait. It is called with the wait queue's q->lock (B) held and then tries to add the iocb to the run list after acquiring the ctx_lock (C). 3) aio_kick_handler() holds the ctx_lock (C) while acquiring the alloc_lock (A) via lock_task() and unuse_mm(). Lockdep emits a warning saying that we're trying to connect the irq-safe q->lock to the irq-unsafe alloc_lock via ctx_lock. This fixes the inversion by calling unuse_mm() in the AIO kick handing path after we've released the ctx_lock. As Ben LaHaise pointed out __put_ioctx could set ctx->mm to NULL, so we must only access ctx->mm while we have the lock. Signed-off-by: Zach Brown <zach.brown@oracle.com> Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com> Acked-by: Benjamin LaHaise <bcrl@kvack.org> Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/aio.c')
-rw-r--r--fs/aio.c7
1 files changed, 3 insertions, 4 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 5f577a63bdf0..ee20fc4240e0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -599,9 +599,6 @@ static void use_mm(struct mm_struct *mm)
599 * by the calling kernel thread 599 * by the calling kernel thread
600 * (Note: this routine is intended to be called only 600 * (Note: this routine is intended to be called only
601 * from a kernel thread context) 601 * from a kernel thread context)
602 *
603 * Comments: Called with ctx->ctx_lock held. This nests
604 * task_lock instead ctx_lock.
605 */ 602 */
606static void unuse_mm(struct mm_struct *mm) 603static void unuse_mm(struct mm_struct *mm)
607{ 604{
@@ -850,14 +847,16 @@ static void aio_kick_handler(struct work_struct *work)
850{ 847{
851 struct kioctx *ctx = container_of(work, struct kioctx, wq.work); 848 struct kioctx *ctx = container_of(work, struct kioctx, wq.work);
852 mm_segment_t oldfs = get_fs(); 849 mm_segment_t oldfs = get_fs();
850 struct mm_struct *mm;
853 int requeue; 851 int requeue;
854 852
855 set_fs(USER_DS); 853 set_fs(USER_DS);
856 use_mm(ctx->mm); 854 use_mm(ctx->mm);
857 spin_lock_irq(&ctx->ctx_lock); 855 spin_lock_irq(&ctx->ctx_lock);
858 requeue =__aio_run_iocbs(ctx); 856 requeue =__aio_run_iocbs(ctx);
859 unuse_mm(ctx->mm); 857 mm = ctx->mm;
860 spin_unlock_irq(&ctx->ctx_lock); 858 spin_unlock_irq(&ctx->ctx_lock);
859 unuse_mm(mm);
861 set_fs(oldfs); 860 set_fs(oldfs);
862 /* 861 /*
863 * we're in a worker thread already, don't use queue_delayed_work, 862 * we're in a worker thread already, don't use queue_delayed_work,