diff options
Diffstat (limited to 'fs/xfs/xfs_inode_item.c')
-rw-r--r-- | fs/xfs/xfs_inode_item.c | 146 |
1 files changed, 44 insertions, 102 deletions
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 9794b876d6ff..7bfea8540159 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include "xfs_ialloc.h" | 41 | #include "xfs_ialloc.h" |
42 | #include "xfs_rw.h" | 42 | #include "xfs_rw.h" |
43 | #include "xfs_error.h" | 43 | #include "xfs_error.h" |
44 | #include "xfs_trace.h" | ||
44 | 45 | ||
45 | 46 | ||
46 | kmem_zone_t *xfs_ili_zone; /* inode log item zone */ | 47 | kmem_zone_t *xfs_ili_zone; /* inode log item zone */ |
@@ -227,7 +228,7 @@ xfs_inode_item_format( | |||
227 | 228 | ||
228 | vecp->i_addr = (xfs_caddr_t)&iip->ili_format; | 229 | vecp->i_addr = (xfs_caddr_t)&iip->ili_format; |
229 | vecp->i_len = sizeof(xfs_inode_log_format_t); | 230 | vecp->i_len = sizeof(xfs_inode_log_format_t); |
230 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); | 231 | vecp->i_type = XLOG_REG_TYPE_IFORMAT; |
231 | vecp++; | 232 | vecp++; |
232 | nvecs = 1; | 233 | nvecs = 1; |
233 | 234 | ||
@@ -278,7 +279,7 @@ xfs_inode_item_format( | |||
278 | 279 | ||
279 | vecp->i_addr = (xfs_caddr_t)&ip->i_d; | 280 | vecp->i_addr = (xfs_caddr_t)&ip->i_d; |
280 | vecp->i_len = sizeof(struct xfs_icdinode); | 281 | vecp->i_len = sizeof(struct xfs_icdinode); |
281 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); | 282 | vecp->i_type = XLOG_REG_TYPE_ICORE; |
282 | vecp++; | 283 | vecp++; |
283 | nvecs++; | 284 | nvecs++; |
284 | iip->ili_format.ilf_fields |= XFS_ILOG_CORE; | 285 | iip->ili_format.ilf_fields |= XFS_ILOG_CORE; |
@@ -335,7 +336,7 @@ xfs_inode_item_format( | |||
335 | vecp->i_addr = | 336 | vecp->i_addr = |
336 | (char *)(ip->i_df.if_u1.if_extents); | 337 | (char *)(ip->i_df.if_u1.if_extents); |
337 | vecp->i_len = ip->i_df.if_bytes; | 338 | vecp->i_len = ip->i_df.if_bytes; |
338 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); | 339 | vecp->i_type = XLOG_REG_TYPE_IEXT; |
339 | } else | 340 | } else |
340 | #endif | 341 | #endif |
341 | { | 342 | { |
@@ -354,7 +355,7 @@ xfs_inode_item_format( | |||
354 | vecp->i_addr = (xfs_caddr_t)ext_buffer; | 355 | vecp->i_addr = (xfs_caddr_t)ext_buffer; |
355 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, | 356 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, |
356 | XFS_DATA_FORK); | 357 | XFS_DATA_FORK); |
357 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); | 358 | vecp->i_type = XLOG_REG_TYPE_IEXT; |
358 | } | 359 | } |
359 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); | 360 | ASSERT(vecp->i_len <= ip->i_df.if_bytes); |
360 | iip->ili_format.ilf_dsize = vecp->i_len; | 361 | iip->ili_format.ilf_dsize = vecp->i_len; |
@@ -372,7 +373,7 @@ xfs_inode_item_format( | |||
372 | ASSERT(ip->i_df.if_broot != NULL); | 373 | ASSERT(ip->i_df.if_broot != NULL); |
373 | vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; | 374 | vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; |
374 | vecp->i_len = ip->i_df.if_broot_bytes; | 375 | vecp->i_len = ip->i_df.if_broot_bytes; |
375 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); | 376 | vecp->i_type = XLOG_REG_TYPE_IBROOT; |
376 | vecp++; | 377 | vecp++; |
377 | nvecs++; | 378 | nvecs++; |
378 | iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; | 379 | iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; |
@@ -398,7 +399,7 @@ xfs_inode_item_format( | |||
398 | ASSERT((ip->i_df.if_real_bytes == 0) || | 399 | ASSERT((ip->i_df.if_real_bytes == 0) || |
399 | (ip->i_df.if_real_bytes == data_bytes)); | 400 | (ip->i_df.if_real_bytes == data_bytes)); |
400 | vecp->i_len = (int)data_bytes; | 401 | vecp->i_len = (int)data_bytes; |
401 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); | 402 | vecp->i_type = XLOG_REG_TYPE_ILOCAL; |
402 | vecp++; | 403 | vecp++; |
403 | nvecs++; | 404 | nvecs++; |
404 | iip->ili_format.ilf_dsize = (unsigned)data_bytes; | 405 | iip->ili_format.ilf_dsize = (unsigned)data_bytes; |
@@ -476,7 +477,7 @@ xfs_inode_item_format( | |||
476 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, | 477 | vecp->i_len = xfs_iextents_copy(ip, ext_buffer, |
477 | XFS_ATTR_FORK); | 478 | XFS_ATTR_FORK); |
478 | #endif | 479 | #endif |
479 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); | 480 | vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; |
480 | iip->ili_format.ilf_asize = vecp->i_len; | 481 | iip->ili_format.ilf_asize = vecp->i_len; |
481 | vecp++; | 482 | vecp++; |
482 | nvecs++; | 483 | nvecs++; |
@@ -491,7 +492,7 @@ xfs_inode_item_format( | |||
491 | ASSERT(ip->i_afp->if_broot != NULL); | 492 | ASSERT(ip->i_afp->if_broot != NULL); |
492 | vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; | 493 | vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; |
493 | vecp->i_len = ip->i_afp->if_broot_bytes; | 494 | vecp->i_len = ip->i_afp->if_broot_bytes; |
494 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); | 495 | vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; |
495 | vecp++; | 496 | vecp++; |
496 | nvecs++; | 497 | nvecs++; |
497 | iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; | 498 | iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; |
@@ -515,7 +516,7 @@ xfs_inode_item_format( | |||
515 | ASSERT((ip->i_afp->if_real_bytes == 0) || | 516 | ASSERT((ip->i_afp->if_real_bytes == 0) || |
516 | (ip->i_afp->if_real_bytes == data_bytes)); | 517 | (ip->i_afp->if_real_bytes == data_bytes)); |
517 | vecp->i_len = (int)data_bytes; | 518 | vecp->i_len = (int)data_bytes; |
518 | XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); | 519 | vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL; |
519 | vecp++; | 520 | vecp++; |
520 | nvecs++; | 521 | nvecs++; |
521 | iip->ili_format.ilf_asize = (unsigned)data_bytes; | 522 | iip->ili_format.ilf_asize = (unsigned)data_bytes; |
@@ -534,23 +535,23 @@ xfs_inode_item_format( | |||
534 | 535 | ||
535 | /* | 536 | /* |
536 | * This is called to pin the inode associated with the inode log | 537 | * This is called to pin the inode associated with the inode log |
537 | * item in memory so it cannot be written out. Do this by calling | 538 | * item in memory so it cannot be written out. |
538 | * xfs_ipin() to bump the pin count in the inode while holding the | ||
539 | * inode pin lock. | ||
540 | */ | 539 | */ |
541 | STATIC void | 540 | STATIC void |
542 | xfs_inode_item_pin( | 541 | xfs_inode_item_pin( |
543 | xfs_inode_log_item_t *iip) | 542 | xfs_inode_log_item_t *iip) |
544 | { | 543 | { |
545 | ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); | 544 | ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); |
546 | xfs_ipin(iip->ili_inode); | 545 | |
546 | atomic_inc(&iip->ili_inode->i_pincount); | ||
547 | } | 547 | } |
548 | 548 | ||
549 | 549 | ||
550 | /* | 550 | /* |
551 | * This is called to unpin the inode associated with the inode log | 551 | * This is called to unpin the inode associated with the inode log |
552 | * item which was previously pinned with a call to xfs_inode_item_pin(). | 552 | * item which was previously pinned with a call to xfs_inode_item_pin(). |
553 | * Just call xfs_iunpin() on the inode to do this. | 553 | * |
554 | * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0. | ||
554 | */ | 555 | */ |
555 | /* ARGSUSED */ | 556 | /* ARGSUSED */ |
556 | STATIC void | 557 | STATIC void |
@@ -558,7 +559,11 @@ xfs_inode_item_unpin( | |||
558 | xfs_inode_log_item_t *iip, | 559 | xfs_inode_log_item_t *iip, |
559 | int stale) | 560 | int stale) |
560 | { | 561 | { |
561 | xfs_iunpin(iip->ili_inode); | 562 | struct xfs_inode *ip = iip->ili_inode; |
563 | |||
564 | ASSERT(atomic_read(&ip->i_pincount) > 0); | ||
565 | if (atomic_dec_and_test(&ip->i_pincount)) | ||
566 | wake_up(&ip->i_ipin_wait); | ||
562 | } | 567 | } |
563 | 568 | ||
564 | /* ARGSUSED */ | 569 | /* ARGSUSED */ |
@@ -567,7 +572,7 @@ xfs_inode_item_unpin_remove( | |||
567 | xfs_inode_log_item_t *iip, | 572 | xfs_inode_log_item_t *iip, |
568 | xfs_trans_t *tp) | 573 | xfs_trans_t *tp) |
569 | { | 574 | { |
570 | xfs_iunpin(iip->ili_inode); | 575 | xfs_inode_item_unpin(iip, 0); |
571 | } | 576 | } |
572 | 577 | ||
573 | /* | 578 | /* |
@@ -601,33 +606,20 @@ xfs_inode_item_trylock( | |||
601 | 606 | ||
602 | if (!xfs_iflock_nowait(ip)) { | 607 | if (!xfs_iflock_nowait(ip)) { |
603 | /* | 608 | /* |
604 | * If someone else isn't already trying to push the inode | 609 | * inode has already been flushed to the backing buffer, |
605 | * buffer, we get to do it. | 610 | * leave it locked in shared mode, pushbuf routine will |
611 | * unlock it. | ||
606 | */ | 612 | */ |
607 | if (iip->ili_pushbuf_flag == 0) { | 613 | return XFS_ITEM_PUSHBUF; |
608 | iip->ili_pushbuf_flag = 1; | ||
609 | #ifdef DEBUG | ||
610 | iip->ili_push_owner = current_pid(); | ||
611 | #endif | ||
612 | /* | ||
613 | * Inode is left locked in shared mode. | ||
614 | * Pushbuf routine gets to unlock it. | ||
615 | */ | ||
616 | return XFS_ITEM_PUSHBUF; | ||
617 | } else { | ||
618 | /* | ||
619 | * We hold the AIL lock, so we must specify the | ||
620 | * NONOTIFY flag so that we won't double trip. | ||
621 | */ | ||
622 | xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); | ||
623 | return XFS_ITEM_FLUSHING; | ||
624 | } | ||
625 | /* NOTREACHED */ | ||
626 | } | 614 | } |
627 | 615 | ||
628 | /* Stale items should force out the iclog */ | 616 | /* Stale items should force out the iclog */ |
629 | if (ip->i_flags & XFS_ISTALE) { | 617 | if (ip->i_flags & XFS_ISTALE) { |
630 | xfs_ifunlock(ip); | 618 | xfs_ifunlock(ip); |
619 | /* | ||
620 | * we hold the AIL lock - notify the unlock routine of this | ||
621 | * so it doesn't try to get the lock again. | ||
622 | */ | ||
631 | xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); | 623 | xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); |
632 | return XFS_ITEM_PINNED; | 624 | return XFS_ITEM_PINNED; |
633 | } | 625 | } |
@@ -745,11 +737,8 @@ xfs_inode_item_committed( | |||
745 | * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK | 737 | * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK |
746 | * failed to get the inode flush lock but did get the inode locked SHARED. | 738 | * failed to get the inode flush lock but did get the inode locked SHARED. |
747 | * Here we're trying to see if the inode buffer is incore, and if so whether it's | 739 | * Here we're trying to see if the inode buffer is incore, and if so whether it's |
748 | * marked delayed write. If that's the case, we'll initiate a bawrite on that | 740 | * marked delayed write. If that's the case, we'll promote it and that will |
749 | * buffer to expedite the process. | 741 | * allow the caller to write the buffer by triggering the xfsbufd to run. |
750 | * | ||
751 | * We aren't holding the AIL lock (or the flush lock) when this gets called, | ||
752 | * so it is inherently race-y. | ||
753 | */ | 742 | */ |
754 | STATIC void | 743 | STATIC void |
755 | xfs_inode_item_pushbuf( | 744 | xfs_inode_item_pushbuf( |
@@ -758,80 +747,30 @@ xfs_inode_item_pushbuf( | |||
758 | xfs_inode_t *ip; | 747 | xfs_inode_t *ip; |
759 | xfs_mount_t *mp; | 748 | xfs_mount_t *mp; |
760 | xfs_buf_t *bp; | 749 | xfs_buf_t *bp; |
761 | uint dopush; | ||
762 | 750 | ||
763 | ip = iip->ili_inode; | 751 | ip = iip->ili_inode; |
764 | |||
765 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | 752 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); |
766 | 753 | ||
767 | /* | 754 | /* |
768 | * The ili_pushbuf_flag keeps others from | ||
769 | * trying to duplicate our effort. | ||
770 | */ | ||
771 | ASSERT(iip->ili_pushbuf_flag != 0); | ||
772 | ASSERT(iip->ili_push_owner == current_pid()); | ||
773 | |||
774 | /* | ||
775 | * If a flush is not in progress anymore, chances are that the | 755 | * If a flush is not in progress anymore, chances are that the |
776 | * inode was taken off the AIL. So, just get out. | 756 | * inode was taken off the AIL. So, just get out. |
777 | */ | 757 | */ |
778 | if (completion_done(&ip->i_flush) || | 758 | if (completion_done(&ip->i_flush) || |
779 | ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { | 759 | ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { |
780 | iip->ili_pushbuf_flag = 0; | ||
781 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 760 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
782 | return; | 761 | return; |
783 | } | 762 | } |
784 | 763 | ||
785 | mp = ip->i_mount; | 764 | mp = ip->i_mount; |
786 | bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, | 765 | bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, |
787 | iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); | 766 | iip->ili_format.ilf_len, XBF_TRYLOCK); |
788 | 767 | ||
789 | if (bp != NULL) { | ||
790 | if (XFS_BUF_ISDELAYWRITE(bp)) { | ||
791 | /* | ||
792 | * We were racing with iflush because we don't hold | ||
793 | * the AIL lock or the flush lock. However, at this point, | ||
794 | * we have the buffer, and we know that it's dirty. | ||
795 | * So, it's possible that iflush raced with us, and | ||
796 | * this item is already taken off the AIL. | ||
797 | * If not, we can flush it async. | ||
798 | */ | ||
799 | dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && | ||
800 | !completion_done(&ip->i_flush)); | ||
801 | iip->ili_pushbuf_flag = 0; | ||
802 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
803 | xfs_buftrace("INODE ITEM PUSH", bp); | ||
804 | if (XFS_BUF_ISPINNED(bp)) { | ||
805 | xfs_log_force(mp, (xfs_lsn_t)0, | ||
806 | XFS_LOG_FORCE); | ||
807 | } | ||
808 | if (dopush) { | ||
809 | int error; | ||
810 | error = xfs_bawrite(mp, bp); | ||
811 | if (error) | ||
812 | xfs_fs_cmn_err(CE_WARN, mp, | ||
813 | "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", | ||
814 | error, iip, bp); | ||
815 | } else { | ||
816 | xfs_buf_relse(bp); | ||
817 | } | ||
818 | } else { | ||
819 | iip->ili_pushbuf_flag = 0; | ||
820 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
821 | xfs_buf_relse(bp); | ||
822 | } | ||
823 | return; | ||
824 | } | ||
825 | /* | ||
826 | * We have to be careful about resetting pushbuf flag too early (above). | ||
827 | * Even though in theory we can do it as soon as we have the buflock, | ||
828 | * we don't want others to be doing work needlessly. They'll come to | ||
829 | * this function thinking that pushing the buffer is their | ||
830 | * responsibility only to find that the buffer is still locked by | ||
831 | * another doing the same thing | ||
832 | */ | ||
833 | iip->ili_pushbuf_flag = 0; | ||
834 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 768 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
769 | if (!bp) | ||
770 | return; | ||
771 | if (XFS_BUF_ISDELAYWRITE(bp)) | ||
772 | xfs_buf_delwri_promote(bp); | ||
773 | xfs_buf_relse(bp); | ||
835 | return; | 774 | return; |
836 | } | 775 | } |
837 | 776 | ||
@@ -864,10 +803,14 @@ xfs_inode_item_push( | |||
864 | iip->ili_format.ilf_fields != 0); | 803 | iip->ili_format.ilf_fields != 0); |
865 | 804 | ||
866 | /* | 805 | /* |
867 | * Write out the inode. The completion routine ('iflush_done') will | 806 | * Push the inode to it's backing buffer. This will not remove the |
868 | * pull it from the AIL, mark it clean, unlock the flush lock. | 807 | * inode from the AIL - a further push will be required to trigger a |
808 | * buffer push. However, this allows all the dirty inodes to be pushed | ||
809 | * to the buffer before it is pushed to disk. THe buffer IO completion | ||
810 | * will pull th einode from the AIL, mark it clean and unlock the flush | ||
811 | * lock. | ||
869 | */ | 812 | */ |
870 | (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC); | 813 | (void) xfs_iflush(ip, 0); |
871 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 814 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
872 | 815 | ||
873 | return; | 816 | return; |
@@ -931,7 +874,6 @@ xfs_inode_item_init( | |||
931 | /* | 874 | /* |
932 | We have zeroed memory. No need ... | 875 | We have zeroed memory. No need ... |
933 | iip->ili_extents_buf = NULL; | 876 | iip->ili_extents_buf = NULL; |
934 | iip->ili_pushbuf_flag = 0; | ||
935 | */ | 877 | */ |
936 | 878 | ||
937 | iip->ili_format.ilf_type = XFS_LI_INODE; | 879 | iip->ili_format.ilf_type = XFS_LI_INODE; |