aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_iget.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/xfs/xfs_iget.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'fs/xfs/xfs_iget.c')
-rw-r--r--fs/xfs/xfs_iget.c105
1 files changed, 82 insertions, 23 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b1ecc6f97ade..3631783b2b53 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -43,6 +43,17 @@
43 43
44 44
45/* 45/*
46 * Define xfs inode iolock lockdep classes. We need to ensure that all active
47 * inodes are considered the same for lockdep purposes, including inodes that
48 * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to
49 * guarantee the locks are considered the same when there are multiple lock
50 * initialisation siteѕ. Also, define a reclaimable inode class so it is
51 * obvious in lockdep reports which class the report is against.
52 */
53static struct lock_class_key xfs_iolock_active;
54struct lock_class_key xfs_iolock_reclaimable;
55
56/*
46 * Allocate and initialise an xfs_inode. 57 * Allocate and initialise an xfs_inode.
47 */ 58 */
48STATIC struct xfs_inode * 59STATIC struct xfs_inode *
@@ -69,8 +80,11 @@ xfs_inode_alloc(
69 ASSERT(atomic_read(&ip->i_pincount) == 0); 80 ASSERT(atomic_read(&ip->i_pincount) == 0);
70 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 81 ASSERT(!spin_is_locked(&ip->i_flags_lock));
71 ASSERT(completion_done(&ip->i_flush)); 82 ASSERT(completion_done(&ip->i_flush));
83 ASSERT(ip->i_ino == 0);
72 84
73 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 85 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
86 lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
87 &xfs_iolock_active, "xfs_iolock_active");
74 88
75 /* initialise the xfs inode */ 89 /* initialise the xfs inode */
76 ip->i_ino = ino; 90 ip->i_ino = ino;
@@ -85,12 +99,20 @@ xfs_inode_alloc(
85 ip->i_size = 0; 99 ip->i_size = 0;
86 ip->i_new_size = 0; 100 ip->i_new_size = 0;
87 101
88 /* prevent anyone from using this yet */
89 VFS_I(ip)->i_state = I_NEW;
90
91 return ip; 102 return ip;
92} 103}
93 104
105STATIC void
106xfs_inode_free_callback(
107 struct rcu_head *head)
108{
109 struct inode *inode = container_of(head, struct inode, i_rcu);
110 struct xfs_inode *ip = XFS_I(inode);
111
112 INIT_LIST_HEAD(&inode->i_dentry);
113 kmem_zone_free(xfs_inode_zone, ip);
114}
115
94void 116void
95xfs_inode_free( 117xfs_inode_free(
96 struct xfs_inode *ip) 118 struct xfs_inode *ip)
@@ -134,7 +156,18 @@ xfs_inode_free(
134 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 156 ASSERT(!spin_is_locked(&ip->i_flags_lock));
135 ASSERT(completion_done(&ip->i_flush)); 157 ASSERT(completion_done(&ip->i_flush));
136 158
137 kmem_zone_free(xfs_inode_zone, ip); 159 /*
160 * Because we use RCU freeing we need to ensure the inode always
161 * appears to be reclaimed with an invalid inode number when in the
162 * free state. The ip->i_flags_lock provides the barrier against lookup
163 * races.
164 */
165 spin_lock(&ip->i_flags_lock);
166 ip->i_flags = XFS_IRECLAIM;
167 ip->i_ino = 0;
168 spin_unlock(&ip->i_flags_lock);
169
170 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
138} 171}
139 172
140/* 173/*
@@ -144,14 +177,29 @@ static int
144xfs_iget_cache_hit( 177xfs_iget_cache_hit(
145 struct xfs_perag *pag, 178 struct xfs_perag *pag,
146 struct xfs_inode *ip, 179 struct xfs_inode *ip,
180 xfs_ino_t ino,
147 int flags, 181 int flags,
148 int lock_flags) __releases(pag->pag_ici_lock) 182 int lock_flags) __releases(RCU)
149{ 183{
150 struct inode *inode = VFS_I(ip); 184 struct inode *inode = VFS_I(ip);
151 struct xfs_mount *mp = ip->i_mount; 185 struct xfs_mount *mp = ip->i_mount;
152 int error; 186 int error;
153 187
188 /*
189 * check for re-use of an inode within an RCU grace period due to the
190 * radix tree nodes not being updated yet. We monitor for this by
191 * setting the inode number to zero before freeing the inode structure.
192 * If the inode has been reallocated and set up, then the inode number
193 * will not match, so check for that, too.
194 */
154 spin_lock(&ip->i_flags_lock); 195 spin_lock(&ip->i_flags_lock);
196 if (ip->i_ino != ino) {
197 trace_xfs_iget_skip(ip);
198 XFS_STATS_INC(xs_ig_frecycle);
199 error = EAGAIN;
200 goto out_error;
201 }
202
155 203
156 /* 204 /*
157 * If we are racing with another cache hit that is currently 205 * If we are racing with another cache hit that is currently
@@ -194,7 +242,7 @@ xfs_iget_cache_hit(
194 ip->i_flags |= XFS_IRECLAIM; 242 ip->i_flags |= XFS_IRECLAIM;
195 243
196 spin_unlock(&ip->i_flags_lock); 244 spin_unlock(&ip->i_flags_lock);
197 read_unlock(&pag->pag_ici_lock); 245 rcu_read_unlock();
198 246
199 error = -inode_init_always(mp->m_super, inode); 247 error = -inode_init_always(mp->m_super, inode);
200 if (error) { 248 if (error) {
@@ -202,24 +250,35 @@ xfs_iget_cache_hit(
202 * Re-initializing the inode failed, and we are in deep 250 * Re-initializing the inode failed, and we are in deep
203 * trouble. Try to re-add it to the reclaim list. 251 * trouble. Try to re-add it to the reclaim list.
204 */ 252 */
205 read_lock(&pag->pag_ici_lock); 253 rcu_read_lock();
206 spin_lock(&ip->i_flags_lock); 254 spin_lock(&ip->i_flags_lock);
207 255
208 ip->i_flags &= ~XFS_INEW; 256 ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
209 ip->i_flags |= XFS_IRECLAIMABLE; 257 ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
210 __xfs_inode_set_reclaim_tag(pag, ip);
211 trace_xfs_iget_reclaim_fail(ip); 258 trace_xfs_iget_reclaim_fail(ip);
212 goto out_error; 259 goto out_error;
213 } 260 }
214 261
215 write_lock(&pag->pag_ici_lock); 262 spin_lock(&pag->pag_ici_lock);
216 spin_lock(&ip->i_flags_lock); 263 spin_lock(&ip->i_flags_lock);
217 ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); 264
265 /*
266 * Clear the per-lifetime state in the inode as we are now
267 * effectively a new inode and need to return to the initial
268 * state before reuse occurs.
269 */
270 ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
218 ip->i_flags |= XFS_INEW; 271 ip->i_flags |= XFS_INEW;
219 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 272 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
220 inode->i_state = I_NEW; 273 inode->i_state = I_NEW;
274
275 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
276 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
277 lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
278 &xfs_iolock_active, "xfs_iolock_active");
279
221 spin_unlock(&ip->i_flags_lock); 280 spin_unlock(&ip->i_flags_lock);
222 write_unlock(&pag->pag_ici_lock); 281 spin_unlock(&pag->pag_ici_lock);
223 } else { 282 } else {
224 /* If the VFS inode is being torn down, pause and try again. */ 283 /* If the VFS inode is being torn down, pause and try again. */
225 if (!igrab(inode)) { 284 if (!igrab(inode)) {
@@ -230,7 +289,7 @@ xfs_iget_cache_hit(
230 289
231 /* We've got a live one. */ 290 /* We've got a live one. */
232 spin_unlock(&ip->i_flags_lock); 291 spin_unlock(&ip->i_flags_lock);
233 read_unlock(&pag->pag_ici_lock); 292 rcu_read_unlock();
234 trace_xfs_iget_hit(ip); 293 trace_xfs_iget_hit(ip);
235 } 294 }
236 295
@@ -244,7 +303,7 @@ xfs_iget_cache_hit(
244 303
245out_error: 304out_error:
246 spin_unlock(&ip->i_flags_lock); 305 spin_unlock(&ip->i_flags_lock);
247 read_unlock(&pag->pag_ici_lock); 306 rcu_read_unlock();
248 return error; 307 return error;
249} 308}
250 309
@@ -297,7 +356,7 @@ xfs_iget_cache_miss(
297 BUG(); 356 BUG();
298 } 357 }
299 358
300 write_lock(&pag->pag_ici_lock); 359 spin_lock(&pag->pag_ici_lock);
301 360
302 /* insert the new inode */ 361 /* insert the new inode */
303 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 362 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
@@ -312,14 +371,14 @@ xfs_iget_cache_miss(
312 ip->i_udquot = ip->i_gdquot = NULL; 371 ip->i_udquot = ip->i_gdquot = NULL;
313 xfs_iflags_set(ip, XFS_INEW); 372 xfs_iflags_set(ip, XFS_INEW);
314 373
315 write_unlock(&pag->pag_ici_lock); 374 spin_unlock(&pag->pag_ici_lock);
316 radix_tree_preload_end(); 375 radix_tree_preload_end();
317 376
318 *ipp = ip; 377 *ipp = ip;
319 return 0; 378 return 0;
320 379
321out_preload_end: 380out_preload_end:
322 write_unlock(&pag->pag_ici_lock); 381 spin_unlock(&pag->pag_ici_lock);
323 radix_tree_preload_end(); 382 radix_tree_preload_end();
324 if (lock_flags) 383 if (lock_flags)
325 xfs_iunlock(ip, lock_flags); 384 xfs_iunlock(ip, lock_flags);
@@ -365,8 +424,8 @@ xfs_iget(
365 xfs_perag_t *pag; 424 xfs_perag_t *pag;
366 xfs_agino_t agino; 425 xfs_agino_t agino;
367 426
368 /* the radix tree exists only in inode capable AGs */ 427 /* reject inode numbers outside existing AGs */
369 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) 428 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
370 return EINVAL; 429 return EINVAL;
371 430
372 /* get the perag structure and ensure that it's inode capable */ 431 /* get the perag structure and ensure that it's inode capable */
@@ -375,15 +434,15 @@ xfs_iget(
375 434
376again: 435again:
377 error = 0; 436 error = 0;
378 read_lock(&pag->pag_ici_lock); 437 rcu_read_lock();
379 ip = radix_tree_lookup(&pag->pag_ici_root, agino); 438 ip = radix_tree_lookup(&pag->pag_ici_root, agino);
380 439
381 if (ip) { 440 if (ip) {
382 error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); 441 error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
383 if (error) 442 if (error)
384 goto out_error_or_again; 443 goto out_error_or_again;
385 } else { 444 } else {
386 read_unlock(&pag->pag_ici_lock); 445 rcu_read_unlock();
387 XFS_STATS_INC(xs_ig_missed); 446 XFS_STATS_INC(xs_ig_missed);
388 447
389 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, 448 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,