aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_sync.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_sync.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c92
1 files changed, 70 insertions, 22 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index afb0d7cfad1c..a02480de9759 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab(
53{ 53{
54 struct inode *inode = VFS_I(ip); 54 struct inode *inode = VFS_I(ip);
55 55
56 ASSERT(rcu_read_lock_held());
57
58 /*
59 * check for stale RCU freed inode
60 *
61 * If the inode has been reallocated, it doesn't matter if it's not in
62 * the AG we are walking - we are walking for writeback, so if it
63 * passes all the "valid inode" checks and is dirty, then we'll write
64 * it back anyway. If it has been reallocated and still being
65 * initialised, the XFS_INEW check below will catch it.
66 */
67 spin_lock(&ip->i_flags_lock);
68 if (!ip->i_ino)
69 goto out_unlock_noent;
70
71 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
72 if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
73 goto out_unlock_noent;
74 spin_unlock(&ip->i_flags_lock);
75
56 /* nothing to sync during shutdown */ 76 /* nothing to sync during shutdown */
57 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 77 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
58 return EFSCORRUPTED; 78 return EFSCORRUPTED;
59 79
60 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
61 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
62 return ENOENT;
63
64 /* If we can't grab the inode, it must on it's way to reclaim. */ 80 /* If we can't grab the inode, it must on it's way to reclaim. */
65 if (!igrab(inode)) 81 if (!igrab(inode))
66 return ENOENT; 82 return ENOENT;
@@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab(
72 88
73 /* inode is valid */ 89 /* inode is valid */
74 return 0; 90 return 0;
91
92out_unlock_noent:
93 spin_unlock(&ip->i_flags_lock);
94 return ENOENT;
75} 95}
76 96
77STATIC int 97STATIC int
@@ -98,12 +118,12 @@ restart:
98 int error = 0; 118 int error = 0;
99 int i; 119 int i;
100 120
101 read_lock(&pag->pag_ici_lock); 121 rcu_read_lock();
102 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 122 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
103 (void **)batch, first_index, 123 (void **)batch, first_index,
104 XFS_LOOKUP_BATCH); 124 XFS_LOOKUP_BATCH);
105 if (!nr_found) { 125 if (!nr_found) {
106 read_unlock(&pag->pag_ici_lock); 126 rcu_read_unlock();
107 break; 127 break;
108 } 128 }
109 129
@@ -118,18 +138,26 @@ restart:
118 batch[i] = NULL; 138 batch[i] = NULL;
119 139
120 /* 140 /*
121 * Update the index for the next lookup. Catch overflows 141 * Update the index for the next lookup. Catch
122 * into the next AG range which can occur if we have inodes 142 * overflows into the next AG range which can occur if
123 * in the last block of the AG and we are currently 143 * we have inodes in the last block of the AG and we
124 * pointing to the last inode. 144 * are currently pointing to the last inode.
145 *
146 * Because we may see inodes that are from the wrong AG
147 * due to RCU freeing and reallocation, only update the
148 * index if it lies in this AG. It was a race that lead
149 * us to see this inode, so another lookup from the
150 * same index will not find it again.
125 */ 151 */
152 if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
153 continue;
126 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 154 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
127 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 155 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
128 done = 1; 156 done = 1;
129 } 157 }
130 158
131 /* unlock now we've grabbed the inodes. */ 159 /* unlock now we've grabbed the inodes. */
132 read_unlock(&pag->pag_ici_lock); 160 rcu_read_unlock();
133 161
134 for (i = 0; i < nr_found; i++) { 162 for (i = 0; i < nr_found; i++) {
135 if (!batch[i]) 163 if (!batch[i])
@@ -592,12 +620,12 @@ xfs_inode_set_reclaim_tag(
592 struct xfs_perag *pag; 620 struct xfs_perag *pag;
593 621
594 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 622 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
595 write_lock(&pag->pag_ici_lock); 623 spin_lock(&pag->pag_ici_lock);
596 spin_lock(&ip->i_flags_lock); 624 spin_lock(&ip->i_flags_lock);
597 __xfs_inode_set_reclaim_tag(pag, ip); 625 __xfs_inode_set_reclaim_tag(pag, ip);
598 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 626 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
599 spin_unlock(&ip->i_flags_lock); 627 spin_unlock(&ip->i_flags_lock);
600 write_unlock(&pag->pag_ici_lock); 628 spin_unlock(&pag->pag_ici_lock);
601 xfs_perag_put(pag); 629 xfs_perag_put(pag);
602} 630}
603 631
@@ -639,9 +667,14 @@ xfs_reclaim_inode_grab(
639 struct xfs_inode *ip, 667 struct xfs_inode *ip,
640 int flags) 668 int flags)
641{ 669{
670 ASSERT(rcu_read_lock_held());
671
672 /* quick check for stale RCU freed inode */
673 if (!ip->i_ino)
674 return 1;
642 675
643 /* 676 /*
644 * do some unlocked checks first to avoid unnecceary lock traffic. 677 * do some unlocked checks first to avoid unnecessary lock traffic.
645 * The first is a flush lock check, the second is a already in reclaim 678 * The first is a flush lock check, the second is a already in reclaim
646 * check. Only do these checks if we are not going to block on locks. 679 * check. Only do these checks if we are not going to block on locks.
647 */ 680 */
@@ -654,11 +687,16 @@ xfs_reclaim_inode_grab(
654 * The radix tree lock here protects a thread in xfs_iget from racing 687 * The radix tree lock here protects a thread in xfs_iget from racing
655 * with us starting reclaim on the inode. Once we have the 688 * with us starting reclaim on the inode. Once we have the
656 * XFS_IRECLAIM flag set it will not touch us. 689 * XFS_IRECLAIM flag set it will not touch us.
690 *
691 * Due to RCU lookup, we may find inodes that have been freed and only
692 * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that
693 * aren't candidates for reclaim at all, so we must check the
694 * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
657 */ 695 */
658 spin_lock(&ip->i_flags_lock); 696 spin_lock(&ip->i_flags_lock);
659 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); 697 if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
660 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { 698 __xfs_iflags_test(ip, XFS_IRECLAIM)) {
661 /* ignore as it is already under reclaim */ 699 /* not a reclaim candidate. */
662 spin_unlock(&ip->i_flags_lock); 700 spin_unlock(&ip->i_flags_lock);
663 return 1; 701 return 1;
664 } 702 }
@@ -795,12 +833,12 @@ reclaim:
795 * added to the tree assert that it's been there before to catch 833 * added to the tree assert that it's been there before to catch
796 * problems with the inode life time early on. 834 * problems with the inode life time early on.
797 */ 835 */
798 write_lock(&pag->pag_ici_lock); 836 spin_lock(&pag->pag_ici_lock);
799 if (!radix_tree_delete(&pag->pag_ici_root, 837 if (!radix_tree_delete(&pag->pag_ici_root,
800 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) 838 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
801 ASSERT(0); 839 ASSERT(0);
802 __xfs_inode_clear_reclaim(pag, ip); 840 __xfs_inode_clear_reclaim(pag, ip);
803 write_unlock(&pag->pag_ici_lock); 841 spin_unlock(&pag->pag_ici_lock);
804 842
805 /* 843 /*
806 * Here we do an (almost) spurious inode lock in order to coordinate 844 * Here we do an (almost) spurious inode lock in order to coordinate
@@ -864,14 +902,14 @@ restart:
864 struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 902 struct xfs_inode *batch[XFS_LOOKUP_BATCH];
865 int i; 903 int i;
866 904
867 write_lock(&pag->pag_ici_lock); 905 rcu_read_lock();
868 nr_found = radix_tree_gang_lookup_tag( 906 nr_found = radix_tree_gang_lookup_tag(
869 &pag->pag_ici_root, 907 &pag->pag_ici_root,
870 (void **)batch, first_index, 908 (void **)batch, first_index,
871 XFS_LOOKUP_BATCH, 909 XFS_LOOKUP_BATCH,
872 XFS_ICI_RECLAIM_TAG); 910 XFS_ICI_RECLAIM_TAG);
873 if (!nr_found) { 911 if (!nr_found) {
874 write_unlock(&pag->pag_ici_lock); 912 rcu_read_unlock();
875 break; 913 break;
876 } 914 }
877 915
@@ -891,14 +929,24 @@ restart:
891 * occur if we have inodes in the last block of 929 * occur if we have inodes in the last block of
892 * the AG and we are currently pointing to the 930 * the AG and we are currently pointing to the
893 * last inode. 931 * last inode.
932 *
933 * Because we may see inodes that are from the
934 * wrong AG due to RCU freeing and
935 * reallocation, only update the index if it
936 * lies in this AG. It was a race that lead us
937 * to see this inode, so another lookup from
938 * the same index will not find it again.
894 */ 939 */
940 if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
941 pag->pag_agno)
942 continue;
895 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 943 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
896 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 944 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
897 done = 1; 945 done = 1;
898 } 946 }
899 947
900 /* unlock now we've grabbed the inodes. */ 948 /* unlock now we've grabbed the inodes. */
901 write_unlock(&pag->pag_ici_lock); 949 rcu_read_unlock();
902 950
903 for (i = 0; i < nr_found; i++) { 951 for (i = 0; i < nr_found; i++) {
904 if (!batch[i]) 952 if (!batch[i])