aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode_item.c
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2010-02-01 18:13:42 -0500
committerDave Chinner <david@fromorbit.com>2010-02-01 18:13:42 -0500
commitd808f617ad00a413585b806de340feda5ad9a2da (patch)
treeed03d4d019a9d8b566ffd454e112e9fbce70bad8 /fs/xfs/xfs_inode_item.c
parentc854363e80b49dd04a4de18ebc379eb8c8806674 (diff)
xfs: Don't issue buffer IO direct from AIL push V2
All buffers logged into the AIL are marked as delayed write. When the AIL needs to push the buffer out, it issues an async write of the buffer. This means that IO patterns are dependent on the order of buffers in the AIL. Instead of flushing the buffer, promote the buffer in the delayed write list so that the next time the xfsbufd is run the buffer will be flushed by the xfsbufd. Return the state to the xfsaild that the buffer was promoted so that the xfsaild knows that it needs to cause the xfsbufd to run to flush the buffers that were promoted. Using the xfsbufd for issuing the IO allows us to dispatch all buffer IO from the one queue. This means that we can make much more enlightened decisions on what order to flush buffers to disk as we don't have multiple places issuing IO. Optimisations to xfsbufd will be in a future patch. Version 2 - kill XFS_ITEM_FLUSHING as it is now unused. Signed-off-by: Dave Chinner <david@fromorbit.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/xfs/xfs_inode_item.c')
-rw-r--r--fs/xfs/xfs_inode_item.c98
1 files changed, 15 insertions, 83 deletions
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 207553e8295..d4dc063111f 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -602,33 +602,20 @@ xfs_inode_item_trylock(
602 602
603 if (!xfs_iflock_nowait(ip)) { 603 if (!xfs_iflock_nowait(ip)) {
604 /* 604 /*
605 * If someone else isn't already trying to push the inode 605 * inode has already been flushed to the backing buffer,
606 * buffer, we get to do it. 606 * leave it locked in shared mode, pushbuf routine will
607 * unlock it.
607 */ 608 */
608 if (iip->ili_pushbuf_flag == 0) { 609 return XFS_ITEM_PUSHBUF;
609 iip->ili_pushbuf_flag = 1;
610#ifdef DEBUG
611 iip->ili_push_owner = current_pid();
612#endif
613 /*
614 * Inode is left locked in shared mode.
615 * Pushbuf routine gets to unlock it.
616 */
617 return XFS_ITEM_PUSHBUF;
618 } else {
619 /*
620 * We hold the AIL lock, so we must specify the
621 * NONOTIFY flag so that we won't double trip.
622 */
623 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
624 return XFS_ITEM_FLUSHING;
625 }
626 /* NOTREACHED */
627 } 610 }
628 611
629 /* Stale items should force out the iclog */ 612 /* Stale items should force out the iclog */
630 if (ip->i_flags & XFS_ISTALE) { 613 if (ip->i_flags & XFS_ISTALE) {
631 xfs_ifunlock(ip); 614 xfs_ifunlock(ip);
615 /*
616 * we hold the AIL lock - notify the unlock routine of this
617 * so it doesn't try to get the lock again.
618 */
632 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); 619 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
633 return XFS_ITEM_PINNED; 620 return XFS_ITEM_PINNED;
634 } 621 }
@@ -746,11 +733,8 @@ xfs_inode_item_committed(
746 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK 733 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK
747 * failed to get the inode flush lock but did get the inode locked SHARED. 734 * failed to get the inode flush lock but did get the inode locked SHARED.
748 * Here we're trying to see if the inode buffer is incore, and if so whether it's 735 * Here we're trying to see if the inode buffer is incore, and if so whether it's
749 * marked delayed write. If that's the case, we'll initiate a bawrite on that 736 * marked delayed write. If that's the case, we'll promote it and that will
750 * buffer to expedite the process. 737 * allow the caller to write the buffer by triggering the xfsbufd to run.
751 *
752 * We aren't holding the AIL lock (or the flush lock) when this gets called,
753 * so it is inherently race-y.
754 */ 738 */
755STATIC void 739STATIC void
756xfs_inode_item_pushbuf( 740xfs_inode_item_pushbuf(
@@ -759,26 +743,16 @@ xfs_inode_item_pushbuf(
759 xfs_inode_t *ip; 743 xfs_inode_t *ip;
760 xfs_mount_t *mp; 744 xfs_mount_t *mp;
761 xfs_buf_t *bp; 745 xfs_buf_t *bp;
762 uint dopush;
763 746
764 ip = iip->ili_inode; 747 ip = iip->ili_inode;
765
766 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 748 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
767 749
768 /* 750 /*
769 * The ili_pushbuf_flag keeps others from
770 * trying to duplicate our effort.
771 */
772 ASSERT(iip->ili_pushbuf_flag != 0);
773 ASSERT(iip->ili_push_owner == current_pid());
774
775 /*
776 * If a flush is not in progress anymore, chances are that the 751 * If a flush is not in progress anymore, chances are that the
777 * inode was taken off the AIL. So, just get out. 752 * inode was taken off the AIL. So, just get out.
778 */ 753 */
779 if (completion_done(&ip->i_flush) || 754 if (completion_done(&ip->i_flush) ||
780 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { 755 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
781 iip->ili_pushbuf_flag = 0;
782 xfs_iunlock(ip, XFS_ILOCK_SHARED); 756 xfs_iunlock(ip, XFS_ILOCK_SHARED);
783 return; 757 return;
784 } 758 }
@@ -787,53 +761,12 @@ xfs_inode_item_pushbuf(
787 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, 761 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno,
788 iip->ili_format.ilf_len, XBF_TRYLOCK); 762 iip->ili_format.ilf_len, XBF_TRYLOCK);
789 763
790 if (bp != NULL) {
791 if (XFS_BUF_ISDELAYWRITE(bp)) {
792 /*
793 * We were racing with iflush because we don't hold
794 * the AIL lock or the flush lock. However, at this point,
795 * we have the buffer, and we know that it's dirty.
796 * So, it's possible that iflush raced with us, and
797 * this item is already taken off the AIL.
798 * If not, we can flush it async.
799 */
800 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
801 !completion_done(&ip->i_flush));
802 iip->ili_pushbuf_flag = 0;
803 xfs_iunlock(ip, XFS_ILOCK_SHARED);
804
805 trace_xfs_inode_item_push(bp, _RET_IP_);
806
807 if (XFS_BUF_ISPINNED(bp))
808 xfs_log_force(mp, 0);
809
810 if (dopush) {
811 int error;
812 error = xfs_bawrite(mp, bp);
813 if (error)
814 xfs_fs_cmn_err(CE_WARN, mp,
815 "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p",
816 error, iip, bp);
817 } else {
818 xfs_buf_relse(bp);
819 }
820 } else {
821 iip->ili_pushbuf_flag = 0;
822 xfs_iunlock(ip, XFS_ILOCK_SHARED);
823 xfs_buf_relse(bp);
824 }
825 return;
826 }
827 /*
828 * We have to be careful about resetting pushbuf flag too early (above).
829 * Even though in theory we can do it as soon as we have the buflock,
830 * we don't want others to be doing work needlessly. They'll come to
831 * this function thinking that pushing the buffer is their
832 * responsibility only to find that the buffer is still locked by
833 * another doing the same thing
834 */
835 iip->ili_pushbuf_flag = 0;
836 xfs_iunlock(ip, XFS_ILOCK_SHARED); 764 xfs_iunlock(ip, XFS_ILOCK_SHARED);
765 if (!bp)
766 return;
767 if (XFS_BUF_ISDELAYWRITE(bp))
768 xfs_buf_delwri_promote(bp);
769 xfs_buf_relse(bp);
837 return; 770 return;
838} 771}
839 772
@@ -937,7 +870,6 @@ xfs_inode_item_init(
937 /* 870 /*
938 We have zeroed memory. No need ... 871 We have zeroed memory. No need ...
939 iip->ili_extents_buf = NULL; 872 iip->ili_extents_buf = NULL;
940 iip->ili_pushbuf_flag = 0;
941 */ 873 */
942 874
943 iip->ili_format.ilf_type = XFS_LI_INODE; 875 iip->ili_format.ilf_type = XFS_LI_INODE;