diff options
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 84 |
1 files changed, 66 insertions, 18 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index afb0d7cfad1c..fd38682da851 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab( | |||
53 | { | 53 | { |
54 | struct inode *inode = VFS_I(ip); | 54 | struct inode *inode = VFS_I(ip); |
55 | 55 | ||
56 | ASSERT(rcu_read_lock_held()); | ||
57 | |||
58 | /* | ||
59 | * check for stale RCU freed inode | ||
60 | * | ||
61 | * If the inode has been reallocated, it doesn't matter if it's not in | ||
62 | * the AG we are walking - we are walking for writeback, so if it | ||
63 | * passes all the "valid inode" checks and is dirty, then we'll write | ||
64 | * it back anyway. If it has been reallocated and still being | ||
65 | * initialised, the XFS_INEW check below will catch it. | ||
66 | */ | ||
67 | spin_lock(&ip->i_flags_lock); | ||
68 | if (!ip->i_ino) | ||
69 | goto out_unlock_noent; | ||
70 | |||
71 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
72 | if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
73 | goto out_unlock_noent; | ||
74 | spin_unlock(&ip->i_flags_lock); | ||
75 | |||
56 | /* nothing to sync during shutdown */ | 76 | /* nothing to sync during shutdown */ |
57 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 77 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
58 | return EFSCORRUPTED; | 78 | return EFSCORRUPTED; |
59 | 79 | ||
60 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
61 | if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
62 | return ENOENT; | ||
63 | |||
64 | /* If we can't grab the inode, it must on it's way to reclaim. */ | 80 | /* If we can't grab the inode, it must on it's way to reclaim. */ |
65 | if (!igrab(inode)) | 81 | if (!igrab(inode)) |
66 | return ENOENT; | 82 | return ENOENT; |
@@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab( | |||
72 | 88 | ||
73 | /* inode is valid */ | 89 | /* inode is valid */ |
74 | return 0; | 90 | return 0; |
91 | |||
92 | out_unlock_noent: | ||
93 | spin_unlock(&ip->i_flags_lock); | ||
94 | return ENOENT; | ||
75 | } | 95 | } |
76 | 96 | ||
77 | STATIC int | 97 | STATIC int |
@@ -98,12 +118,12 @@ restart: | |||
98 | int error = 0; | 118 | int error = 0; |
99 | int i; | 119 | int i; |
100 | 120 | ||
101 | read_lock(&pag->pag_ici_lock); | 121 | rcu_read_lock(); |
102 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | 122 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, |
103 | (void **)batch, first_index, | 123 | (void **)batch, first_index, |
104 | XFS_LOOKUP_BATCH); | 124 | XFS_LOOKUP_BATCH); |
105 | if (!nr_found) { | 125 | if (!nr_found) { |
106 | read_unlock(&pag->pag_ici_lock); | 126 | rcu_read_unlock(); |
107 | break; | 127 | break; |
108 | } | 128 | } |
109 | 129 | ||
@@ -118,18 +138,26 @@ restart: | |||
118 | batch[i] = NULL; | 138 | batch[i] = NULL; |
119 | 139 | ||
120 | /* | 140 | /* |
121 | * Update the index for the next lookup. Catch overflows | 141 | * Update the index for the next lookup. Catch |
122 | * into the next AG range which can occur if we have inodes | 142 | * overflows into the next AG range which can occur if |
123 | * in the last block of the AG and we are currently | 143 | * we have inodes in the last block of the AG and we |
124 | * pointing to the last inode. | 144 | * are currently pointing to the last inode. |
145 | * | ||
146 | * Because we may see inodes that are from the wrong AG | ||
147 | * due to RCU freeing and reallocation, only update the | ||
148 | * index if it lies in this AG. It was a race that lead | ||
149 | * us to see this inode, so another lookup from the | ||
150 | * same index will not find it again. | ||
125 | */ | 151 | */ |
152 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) | ||
153 | continue; | ||
126 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 154 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
127 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 155 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
128 | done = 1; | 156 | done = 1; |
129 | } | 157 | } |
130 | 158 | ||
131 | /* unlock now we've grabbed the inodes. */ | 159 | /* unlock now we've grabbed the inodes. */ |
132 | read_unlock(&pag->pag_ici_lock); | 160 | rcu_read_unlock(); |
133 | 161 | ||
134 | for (i = 0; i < nr_found; i++) { | 162 | for (i = 0; i < nr_found; i++) { |
135 | if (!batch[i]) | 163 | if (!batch[i]) |
@@ -639,9 +667,14 @@ xfs_reclaim_inode_grab( | |||
639 | struct xfs_inode *ip, | 667 | struct xfs_inode *ip, |
640 | int flags) | 668 | int flags) |
641 | { | 669 | { |
670 | ASSERT(rcu_read_lock_held()); | ||
671 | |||
672 | /* quick check for stale RCU freed inode */ | ||
673 | if (!ip->i_ino) | ||
674 | return 1; | ||
642 | 675 | ||
643 | /* | 676 | /* |
644 | * do some unlocked checks first to avoid unnecceary lock traffic. | 677 | * do some unlocked checks first to avoid unnecessary lock traffic. |
645 | * The first is a flush lock check, the second is a already in reclaim | 678 | * The first is a flush lock check, the second is a already in reclaim |
646 | * check. Only do these checks if we are not going to block on locks. | 679 | * check. Only do these checks if we are not going to block on locks. |
647 | */ | 680 | */ |
@@ -654,11 +687,16 @@ xfs_reclaim_inode_grab( | |||
654 | * The radix tree lock here protects a thread in xfs_iget from racing | 687 | * The radix tree lock here protects a thread in xfs_iget from racing |
655 | * with us starting reclaim on the inode. Once we have the | 688 | * with us starting reclaim on the inode. Once we have the |
656 | * XFS_IRECLAIM flag set it will not touch us. | 689 | * XFS_IRECLAIM flag set it will not touch us. |
690 | * | ||
691 | * Due to RCU lookup, we may find inodes that have been freed and only | ||
692 | * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that | ||
693 | * aren't candidates for reclaim at all, so we must check the | ||
694 | * XFS_IRECLAIMABLE is set first before proceeding to reclaim. | ||
657 | */ | 695 | */ |
658 | spin_lock(&ip->i_flags_lock); | 696 | spin_lock(&ip->i_flags_lock); |
659 | ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | 697 | if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || |
660 | if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { | 698 | __xfs_iflags_test(ip, XFS_IRECLAIM)) { |
661 | /* ignore as it is already under reclaim */ | 699 | /* not a reclaim candidate. */ |
662 | spin_unlock(&ip->i_flags_lock); | 700 | spin_unlock(&ip->i_flags_lock); |
663 | return 1; | 701 | return 1; |
664 | } | 702 | } |
@@ -864,14 +902,14 @@ restart: | |||
864 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; | 902 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; |
865 | int i; | 903 | int i; |
866 | 904 | ||
867 | write_lock(&pag->pag_ici_lock); | 905 | rcu_read_lock(); |
868 | nr_found = radix_tree_gang_lookup_tag( | 906 | nr_found = radix_tree_gang_lookup_tag( |
869 | &pag->pag_ici_root, | 907 | &pag->pag_ici_root, |
870 | (void **)batch, first_index, | 908 | (void **)batch, first_index, |
871 | XFS_LOOKUP_BATCH, | 909 | XFS_LOOKUP_BATCH, |
872 | XFS_ICI_RECLAIM_TAG); | 910 | XFS_ICI_RECLAIM_TAG); |
873 | if (!nr_found) { | 911 | if (!nr_found) { |
874 | write_unlock(&pag->pag_ici_lock); | 912 | rcu_read_unlock(); |
875 | break; | 913 | break; |
876 | } | 914 | } |
877 | 915 | ||
@@ -891,14 +929,24 @@ restart: | |||
891 | * occur if we have inodes in the last block of | 929 | * occur if we have inodes in the last block of |
892 | * the AG and we are currently pointing to the | 930 | * the AG and we are currently pointing to the |
893 | * last inode. | 931 | * last inode. |
932 | * | ||
933 | * Because we may see inodes that are from the | ||
934 | * wrong AG due to RCU freeing and | ||
935 | * reallocation, only update the index if it | ||
936 | * lies in this AG. It was a race that lead us | ||
937 | * to see this inode, so another lookup from | ||
938 | * the same index will not find it again. | ||
894 | */ | 939 | */ |
940 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != | ||
941 | pag->pag_agno) | ||
942 | continue; | ||
895 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 943 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
896 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 944 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
897 | done = 1; | 945 | done = 1; |
898 | } | 946 | } |
899 | 947 | ||
900 | /* unlock now we've grabbed the inodes. */ | 948 | /* unlock now we've grabbed the inodes. */ |
901 | write_unlock(&pag->pag_ici_lock); | 949 | rcu_read_unlock(); |
902 | 950 | ||
903 | for (i = 0; i < nr_found; i++) { | 951 | for (i = 0; i < nr_found; i++) { |
904 | if (!batch[i]) | 952 | if (!batch[i]) |