diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2006-12-14 13:24:26 -0500 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2007-02-05 13:36:09 -0500 |
commit | c7b3383437ff41781964d1bf7f40ff8d7dd5bc47 (patch) | |
tree | dbe0e6cb267bfe1dc5f52ee6c30529b55ac58a16 | |
parent | 927255f0383342f5d49b82adb6689b9cba52a6f5 (diff) |
[GFS2] Fix DIO deadlock
This patch fixes Red Hat bugzilla #212627 in which a deadlock occurs
due to trying to take the i_mutex while holding a glock. The correct
locking order is defined as i_mutex -> glock in all cases.
I've left dealing with allocating writes. I know that we need to do
that, but for now this should do the trick. We don't need to take the
i_mutex on write, because the VFS has already taken it for us. On read
we don't need it since the glock is enough protection. The reason that
I've made some of the checks into a separate function is that we'll need
to do the checks again in the allocating write case eventually, so this
is partly in preparation for this. Likewise the return value test of !=
1 might look a bit odd and thats because we'll need a third return value
in case of requiring an allocation.
I've made the change to deferred mode on the glock to ensure flushing
read caches on other nodes. I notice that (using blktrace to look at
whats going on) we appear to do a better job of large I/Os than ext3
after this patch (in terms of not splitting up the I/Os).
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Wendy Cheng <wcheng@redhat.com>
-rw-r--r-- | fs/gfs2/ops_address.c | 74 |
1 files changed, 45 insertions, 29 deletions
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index d8d69a72a10d..0118aa439c1d 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -594,6 +594,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset) | |||
594 | return; | 594 | return; |
595 | } | 595 | } |
596 | 596 | ||
597 | /** | ||
598 | * gfs2_ok_for_dio - check that dio is valid on this file | ||
599 | * @ip: The inode | ||
600 | * @rw: READ or WRITE | ||
601 | * @offset: The offset at which we are reading or writing | ||
602 | * | ||
603 | * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) | ||
604 | * 1 (to accept the i/o request) | ||
605 | */ | ||
606 | static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | ||
607 | { | ||
608 | /* | ||
609 | * Should we return an error here? I can't see that O_DIRECT for | ||
610 | * a journaled file makes any sense. For now we'll silently fall | ||
611 | * back to buffered I/O, likewise we do the same for stuffed | ||
612 | * files since they are (a) small and (b) unaligned. | ||
613 | */ | ||
614 | if (gfs2_is_jdata(ip)) | ||
615 | return 0; | ||
616 | |||
617 | if (gfs2_is_stuffed(ip)) | ||
618 | return 0; | ||
619 | |||
620 | if (offset > i_size_read(&ip->i_inode)) | ||
621 | return 0; | ||
622 | return 1; | ||
623 | } | ||
624 | |||
625 | |||
626 | |||
597 | static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | 627 | static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, |
598 | const struct iovec *iov, loff_t offset, | 628 | const struct iovec *iov, loff_t offset, |
599 | unsigned long nr_segs) | 629 | unsigned long nr_segs) |
@@ -604,42 +634,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
604 | struct gfs2_holder gh; | 634 | struct gfs2_holder gh; |
605 | int rv; | 635 | int rv; |
606 | 636 | ||
607 | if (rw == READ) | ||
608 | mutex_lock(&inode->i_mutex); | ||
609 | /* | 637 | /* |
610 | * Shared lock, even if its a write, since we do no allocation | 638 | * Deferred lock, even if its a write, since we do no allocation |
611 | * on this path. All we need change is atime. | 639 | * on this path. All we need change is atime, and this lock mode |
640 | * ensures that other nodes have flushed their buffered read caches | ||
641 | * (i.e. their page cache entries for this inode). We do not, | ||
642 | * unfortunately have the option of only flushing a range like | ||
643 | * the VFS does. | ||
612 | */ | 644 | */ |
613 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 645 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); |
614 | rv = gfs2_glock_nq_atime(&gh); | 646 | rv = gfs2_glock_nq_atime(&gh); |
615 | if (rv) | 647 | if (rv) |
616 | goto out; | 648 | return rv; |
617 | 649 | rv = gfs2_ok_for_dio(ip, rw, offset); | |
618 | if (offset > i_size_read(inode)) | 650 | if (rv != 1) |
619 | goto out; | 651 | goto out; /* dio not valid, fall back to buffered i/o */ |
620 | 652 | ||
621 | /* | 653 | rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, |
622 | * Should we return an error here? I can't see that O_DIRECT for | 654 | iov, offset, nr_segs, |
623 | * a journaled file makes any sense. For now we'll silently fall | 655 | gfs2_get_block_direct, NULL); |
624 | * back to buffered I/O, likewise we do the same for stuffed | ||
625 | * files since they are (a) small and (b) unaligned. | ||
626 | */ | ||
627 | if (gfs2_is_jdata(ip)) | ||
628 | goto out; | ||
629 | |||
630 | if (gfs2_is_stuffed(ip)) | ||
631 | goto out; | ||
632 | |||
633 | rv = blockdev_direct_IO_own_locking(rw, iocb, inode, | ||
634 | inode->i_sb->s_bdev, | ||
635 | iov, offset, nr_segs, | ||
636 | gfs2_get_block_direct, NULL); | ||
637 | out: | 656 | out: |
638 | gfs2_glock_dq_m(1, &gh); | 657 | gfs2_glock_dq_m(1, &gh); |
639 | gfs2_holder_uninit(&gh); | 658 | gfs2_holder_uninit(&gh); |
640 | if (rw == READ) | ||
641 | mutex_unlock(&inode->i_mutex); | ||
642 | |||
643 | return rv; | 659 | return rv; |
644 | } | 660 | } |
645 | 661 | ||