aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2006-12-14 13:24:26 -0500
committerSteven Whitehouse <swhiteho@redhat.com>2007-02-05 13:36:09 -0500
commitc7b3383437ff41781964d1bf7f40ff8d7dd5bc47 (patch)
treedbe0e6cb267bfe1dc5f52ee6c30529b55ac58a16
parent927255f0383342f5d49b82adb6689b9cba52a6f5 (diff)
[GFS2] Fix DIO deadlock
This patch fixes Red Hat bugzilla #212627 in which a deadlock occurs due to trying to take the i_mutex while holding a glock. The correct locking order is defined as i_mutex -> glock in all cases. I've left dealing with allocating writes. I know that we need to do that, but for now this should do the trick. We don't need to take the i_mutex on write, because the VFS has already taken it for us. On read we don't need it since the glock is enough protection. The reason that I've made some of the checks into a separate function is that we'll need to do the checks again in the allocating write case eventually, so this is partly in preparation for this. Likewise the return value test of != 1 might look a bit odd and thats because we'll need a third return value in case of requiring an allocation. I've made the change to deferred mode on the glock to ensure flushing read caches on other nodes. I notice that (using blktrace to look at whats going on) we appear to do a better job of large I/Os than ext3 after this patch (in terms of not splitting up the I/Os). Signed-off-by: Steven Whitehouse <swhiteho@redhat.com> Cc: Wendy Cheng <wcheng@redhat.com>
-rw-r--r--fs/gfs2/ops_address.c74
1 files changed, 45 insertions, 29 deletions
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index d8d69a72a10d..0118aa439c1d 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -594,6 +594,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
594 return; 594 return;
595} 595}
596 596
597/**
598 * gfs2_ok_for_dio - check that dio is valid on this file
599 * @ip: The inode
600 * @rw: READ or WRITE
601 * @offset: The offset at which we are reading or writing
602 *
603 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
604 * 1 (to accept the i/o request)
605 */
606static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
607{
608 /*
609 * Should we return an error here? I can't see that O_DIRECT for
610 * a journaled file makes any sense. For now we'll silently fall
611 * back to buffered I/O, likewise we do the same for stuffed
612 * files since they are (a) small and (b) unaligned.
613 */
614 if (gfs2_is_jdata(ip))
615 return 0;
616
617 if (gfs2_is_stuffed(ip))
618 return 0;
619
620 if (offset > i_size_read(&ip->i_inode))
621 return 0;
622 return 1;
623}
624
625
626
597static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, 627static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
598 const struct iovec *iov, loff_t offset, 628 const struct iovec *iov, loff_t offset,
599 unsigned long nr_segs) 629 unsigned long nr_segs)
@@ -604,42 +634,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
604 struct gfs2_holder gh; 634 struct gfs2_holder gh;
605 int rv; 635 int rv;
606 636
607 if (rw == READ)
608 mutex_lock(&inode->i_mutex);
609 /* 637 /*
610 * Shared lock, even if its a write, since we do no allocation 638 * Deferred lock, even if its a write, since we do no allocation
611 * on this path. All we need change is atime. 639 * on this path. All we need change is atime, and this lock mode
640 * ensures that other nodes have flushed their buffered read caches
641 * (i.e. their page cache entries for this inode). We do not,
642 * unfortunately have the option of only flushing a range like
643 * the VFS does.
612 */ 644 */
613 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); 645 gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
614 rv = gfs2_glock_nq_atime(&gh); 646 rv = gfs2_glock_nq_atime(&gh);
615 if (rv) 647 if (rv)
616 goto out; 648 return rv;
617 649 rv = gfs2_ok_for_dio(ip, rw, offset);
618 if (offset > i_size_read(inode)) 650 if (rv != 1)
619 goto out; 651 goto out; /* dio not valid, fall back to buffered i/o */
620 652
621 /* 653 rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
622 * Should we return an error here? I can't see that O_DIRECT for 654 iov, offset, nr_segs,
623 * a journaled file makes any sense. For now we'll silently fall 655 gfs2_get_block_direct, NULL);
624 * back to buffered I/O, likewise we do the same for stuffed
625 * files since they are (a) small and (b) unaligned.
626 */
627 if (gfs2_is_jdata(ip))
628 goto out;
629
630 if (gfs2_is_stuffed(ip))
631 goto out;
632
633 rv = blockdev_direct_IO_own_locking(rw, iocb, inode,
634 inode->i_sb->s_bdev,
635 iov, offset, nr_segs,
636 gfs2_get_block_direct, NULL);
637out: 656out:
638 gfs2_glock_dq_m(1, &gh); 657 gfs2_glock_dq_m(1, &gh);
639 gfs2_holder_uninit(&gh); 658 gfs2_holder_uninit(&gh);
640 if (rw == READ)
641 mutex_unlock(&inode->i_mutex);
642
643 return rv; 659 return rv;
644} 660}
645 661