diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/xfs/xfs_iget.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'fs/xfs/xfs_iget.c')
-rw-r--r-- | fs/xfs/xfs_iget.c | 105 |
1 files changed, 82 insertions, 23 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b1ecc6f97ade..3631783b2b53 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -43,6 +43,17 @@ | |||
43 | 43 | ||
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Define xfs inode iolock lockdep classes. We need to ensure that all active | ||
47 | * inodes are considered the same for lockdep purposes, including inodes that | ||
48 | * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to | ||
49 | * guarantee the locks are considered the same when there are multiple lock | ||
50 | * initialisation siteѕ. Also, define a reclaimable inode class so it is | ||
51 | * obvious in lockdep reports which class the report is against. | ||
52 | */ | ||
53 | static struct lock_class_key xfs_iolock_active; | ||
54 | struct lock_class_key xfs_iolock_reclaimable; | ||
55 | |||
56 | /* | ||
46 | * Allocate and initialise an xfs_inode. | 57 | * Allocate and initialise an xfs_inode. |
47 | */ | 58 | */ |
48 | STATIC struct xfs_inode * | 59 | STATIC struct xfs_inode * |
@@ -69,8 +80,11 @@ xfs_inode_alloc( | |||
69 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 80 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
70 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 81 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
71 | ASSERT(completion_done(&ip->i_flush)); | 82 | ASSERT(completion_done(&ip->i_flush)); |
83 | ASSERT(ip->i_ino == 0); | ||
72 | 84 | ||
73 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 85 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
86 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
87 | &xfs_iolock_active, "xfs_iolock_active"); | ||
74 | 88 | ||
75 | /* initialise the xfs inode */ | 89 | /* initialise the xfs inode */ |
76 | ip->i_ino = ino; | 90 | ip->i_ino = ino; |
@@ -85,12 +99,20 @@ xfs_inode_alloc( | |||
85 | ip->i_size = 0; | 99 | ip->i_size = 0; |
86 | ip->i_new_size = 0; | 100 | ip->i_new_size = 0; |
87 | 101 | ||
88 | /* prevent anyone from using this yet */ | ||
89 | VFS_I(ip)->i_state = I_NEW; | ||
90 | |||
91 | return ip; | 102 | return ip; |
92 | } | 103 | } |
93 | 104 | ||
105 | STATIC void | ||
106 | xfs_inode_free_callback( | ||
107 | struct rcu_head *head) | ||
108 | { | ||
109 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
110 | struct xfs_inode *ip = XFS_I(inode); | ||
111 | |||
112 | INIT_LIST_HEAD(&inode->i_dentry); | ||
113 | kmem_zone_free(xfs_inode_zone, ip); | ||
114 | } | ||
115 | |||
94 | void | 116 | void |
95 | xfs_inode_free( | 117 | xfs_inode_free( |
96 | struct xfs_inode *ip) | 118 | struct xfs_inode *ip) |
@@ -134,7 +156,18 @@ xfs_inode_free( | |||
134 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 156 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
135 | ASSERT(completion_done(&ip->i_flush)); | 157 | ASSERT(completion_done(&ip->i_flush)); |
136 | 158 | ||
137 | kmem_zone_free(xfs_inode_zone, ip); | 159 | /* |
160 | * Because we use RCU freeing we need to ensure the inode always | ||
161 | * appears to be reclaimed with an invalid inode number when in the | ||
162 | * free state. The ip->i_flags_lock provides the barrier against lookup | ||
163 | * races. | ||
164 | */ | ||
165 | spin_lock(&ip->i_flags_lock); | ||
166 | ip->i_flags = XFS_IRECLAIM; | ||
167 | ip->i_ino = 0; | ||
168 | spin_unlock(&ip->i_flags_lock); | ||
169 | |||
170 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); | ||
138 | } | 171 | } |
139 | 172 | ||
140 | /* | 173 | /* |
@@ -144,14 +177,29 @@ static int | |||
144 | xfs_iget_cache_hit( | 177 | xfs_iget_cache_hit( |
145 | struct xfs_perag *pag, | 178 | struct xfs_perag *pag, |
146 | struct xfs_inode *ip, | 179 | struct xfs_inode *ip, |
180 | xfs_ino_t ino, | ||
147 | int flags, | 181 | int flags, |
148 | int lock_flags) __releases(pag->pag_ici_lock) | 182 | int lock_flags) __releases(RCU) |
149 | { | 183 | { |
150 | struct inode *inode = VFS_I(ip); | 184 | struct inode *inode = VFS_I(ip); |
151 | struct xfs_mount *mp = ip->i_mount; | 185 | struct xfs_mount *mp = ip->i_mount; |
152 | int error; | 186 | int error; |
153 | 187 | ||
188 | /* | ||
189 | * check for re-use of an inode within an RCU grace period due to the | ||
190 | * radix tree nodes not being updated yet. We monitor for this by | ||
191 | * setting the inode number to zero before freeing the inode structure. | ||
192 | * If the inode has been reallocated and set up, then the inode number | ||
193 | * will not match, so check for that, too. | ||
194 | */ | ||
154 | spin_lock(&ip->i_flags_lock); | 195 | spin_lock(&ip->i_flags_lock); |
196 | if (ip->i_ino != ino) { | ||
197 | trace_xfs_iget_skip(ip); | ||
198 | XFS_STATS_INC(xs_ig_frecycle); | ||
199 | error = EAGAIN; | ||
200 | goto out_error; | ||
201 | } | ||
202 | |||
155 | 203 | ||
156 | /* | 204 | /* |
157 | * If we are racing with another cache hit that is currently | 205 | * If we are racing with another cache hit that is currently |
@@ -194,7 +242,7 @@ xfs_iget_cache_hit( | |||
194 | ip->i_flags |= XFS_IRECLAIM; | 242 | ip->i_flags |= XFS_IRECLAIM; |
195 | 243 | ||
196 | spin_unlock(&ip->i_flags_lock); | 244 | spin_unlock(&ip->i_flags_lock); |
197 | read_unlock(&pag->pag_ici_lock); | 245 | rcu_read_unlock(); |
198 | 246 | ||
199 | error = -inode_init_always(mp->m_super, inode); | 247 | error = -inode_init_always(mp->m_super, inode); |
200 | if (error) { | 248 | if (error) { |
@@ -202,24 +250,35 @@ xfs_iget_cache_hit( | |||
202 | * Re-initializing the inode failed, and we are in deep | 250 | * Re-initializing the inode failed, and we are in deep |
203 | * trouble. Try to re-add it to the reclaim list. | 251 | * trouble. Try to re-add it to the reclaim list. |
204 | */ | 252 | */ |
205 | read_lock(&pag->pag_ici_lock); | 253 | rcu_read_lock(); |
206 | spin_lock(&ip->i_flags_lock); | 254 | spin_lock(&ip->i_flags_lock); |
207 | 255 | ||
208 | ip->i_flags &= ~XFS_INEW; | 256 | ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); |
209 | ip->i_flags |= XFS_IRECLAIMABLE; | 257 | ASSERT(ip->i_flags & XFS_IRECLAIMABLE); |
210 | __xfs_inode_set_reclaim_tag(pag, ip); | ||
211 | trace_xfs_iget_reclaim_fail(ip); | 258 | trace_xfs_iget_reclaim_fail(ip); |
212 | goto out_error; | 259 | goto out_error; |
213 | } | 260 | } |
214 | 261 | ||
215 | write_lock(&pag->pag_ici_lock); | 262 | spin_lock(&pag->pag_ici_lock); |
216 | spin_lock(&ip->i_flags_lock); | 263 | spin_lock(&ip->i_flags_lock); |
217 | ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); | 264 | |
265 | /* | ||
266 | * Clear the per-lifetime state in the inode as we are now | ||
267 | * effectively a new inode and need to return to the initial | ||
268 | * state before reuse occurs. | ||
269 | */ | ||
270 | ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; | ||
218 | ip->i_flags |= XFS_INEW; | 271 | ip->i_flags |= XFS_INEW; |
219 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 272 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
220 | inode->i_state = I_NEW; | 273 | inode->i_state = I_NEW; |
274 | |||
275 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | ||
276 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
277 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
278 | &xfs_iolock_active, "xfs_iolock_active"); | ||
279 | |||
221 | spin_unlock(&ip->i_flags_lock); | 280 | spin_unlock(&ip->i_flags_lock); |
222 | write_unlock(&pag->pag_ici_lock); | 281 | spin_unlock(&pag->pag_ici_lock); |
223 | } else { | 282 | } else { |
224 | /* If the VFS inode is being torn down, pause and try again. */ | 283 | /* If the VFS inode is being torn down, pause and try again. */ |
225 | if (!igrab(inode)) { | 284 | if (!igrab(inode)) { |
@@ -230,7 +289,7 @@ xfs_iget_cache_hit( | |||
230 | 289 | ||
231 | /* We've got a live one. */ | 290 | /* We've got a live one. */ |
232 | spin_unlock(&ip->i_flags_lock); | 291 | spin_unlock(&ip->i_flags_lock); |
233 | read_unlock(&pag->pag_ici_lock); | 292 | rcu_read_unlock(); |
234 | trace_xfs_iget_hit(ip); | 293 | trace_xfs_iget_hit(ip); |
235 | } | 294 | } |
236 | 295 | ||
@@ -244,7 +303,7 @@ xfs_iget_cache_hit( | |||
244 | 303 | ||
245 | out_error: | 304 | out_error: |
246 | spin_unlock(&ip->i_flags_lock); | 305 | spin_unlock(&ip->i_flags_lock); |
247 | read_unlock(&pag->pag_ici_lock); | 306 | rcu_read_unlock(); |
248 | return error; | 307 | return error; |
249 | } | 308 | } |
250 | 309 | ||
@@ -297,7 +356,7 @@ xfs_iget_cache_miss( | |||
297 | BUG(); | 356 | BUG(); |
298 | } | 357 | } |
299 | 358 | ||
300 | write_lock(&pag->pag_ici_lock); | 359 | spin_lock(&pag->pag_ici_lock); |
301 | 360 | ||
302 | /* insert the new inode */ | 361 | /* insert the new inode */ |
303 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); | 362 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); |
@@ -312,14 +371,14 @@ xfs_iget_cache_miss( | |||
312 | ip->i_udquot = ip->i_gdquot = NULL; | 371 | ip->i_udquot = ip->i_gdquot = NULL; |
313 | xfs_iflags_set(ip, XFS_INEW); | 372 | xfs_iflags_set(ip, XFS_INEW); |
314 | 373 | ||
315 | write_unlock(&pag->pag_ici_lock); | 374 | spin_unlock(&pag->pag_ici_lock); |
316 | radix_tree_preload_end(); | 375 | radix_tree_preload_end(); |
317 | 376 | ||
318 | *ipp = ip; | 377 | *ipp = ip; |
319 | return 0; | 378 | return 0; |
320 | 379 | ||
321 | out_preload_end: | 380 | out_preload_end: |
322 | write_unlock(&pag->pag_ici_lock); | 381 | spin_unlock(&pag->pag_ici_lock); |
323 | radix_tree_preload_end(); | 382 | radix_tree_preload_end(); |
324 | if (lock_flags) | 383 | if (lock_flags) |
325 | xfs_iunlock(ip, lock_flags); | 384 | xfs_iunlock(ip, lock_flags); |
@@ -365,8 +424,8 @@ xfs_iget( | |||
365 | xfs_perag_t *pag; | 424 | xfs_perag_t *pag; |
366 | xfs_agino_t agino; | 425 | xfs_agino_t agino; |
367 | 426 | ||
368 | /* the radix tree exists only in inode capable AGs */ | 427 | /* reject inode numbers outside existing AGs */ |
369 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) | 428 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) |
370 | return EINVAL; | 429 | return EINVAL; |
371 | 430 | ||
372 | /* get the perag structure and ensure that it's inode capable */ | 431 | /* get the perag structure and ensure that it's inode capable */ |
@@ -375,15 +434,15 @@ xfs_iget( | |||
375 | 434 | ||
376 | again: | 435 | again: |
377 | error = 0; | 436 | error = 0; |
378 | read_lock(&pag->pag_ici_lock); | 437 | rcu_read_lock(); |
379 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | 438 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); |
380 | 439 | ||
381 | if (ip) { | 440 | if (ip) { |
382 | error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); | 441 | error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); |
383 | if (error) | 442 | if (error) |
384 | goto out_error_or_again; | 443 | goto out_error_or_again; |
385 | } else { | 444 | } else { |
386 | read_unlock(&pag->pag_ici_lock); | 445 | rcu_read_unlock(); |
387 | XFS_STATS_INC(xs_ig_missed); | 446 | XFS_STATS_INC(xs_ig_missed); |
388 | 447 | ||
389 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, | 448 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, |