diff options
| author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-22 10:38:37 -0500 |
|---|---|---|
| committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-22 10:38:37 -0500 |
| commit | fcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (patch) | |
| tree | a57612d1888735a2ec7972891b68c1ac5ec8faea /fs/xfs/xfs_iget.c | |
| parent | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (diff) | |
Diffstat (limited to 'fs/xfs/xfs_iget.c')
| -rw-r--r-- | fs/xfs/xfs_iget.c | 720 |
1 files changed, 720 insertions, 0 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c new file mode 100644 index 00000000000..7759812c1bb --- /dev/null +++ b/fs/xfs/xfs_iget.c | |||
| @@ -0,0 +1,720 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
| 3 | * All Rights Reserved. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or | ||
| 6 | * modify it under the terms of the GNU General Public License as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it would be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write the Free Software Foundation, | ||
| 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 17 | */ | ||
| 18 | #include "xfs.h" | ||
| 19 | #include "xfs_fs.h" | ||
| 20 | #include "xfs_types.h" | ||
| 21 | #include "xfs_acl.h" | ||
| 22 | #include "xfs_bit.h" | ||
| 23 | #include "xfs_log.h" | ||
| 24 | #include "xfs_inum.h" | ||
| 25 | #include "xfs_trans.h" | ||
| 26 | #include "xfs_sb.h" | ||
| 27 | #include "xfs_ag.h" | ||
| 28 | #include "xfs_mount.h" | ||
| 29 | #include "xfs_bmap_btree.h" | ||
| 30 | #include "xfs_alloc_btree.h" | ||
| 31 | #include "xfs_ialloc_btree.h" | ||
| 32 | #include "xfs_dinode.h" | ||
| 33 | #include "xfs_inode.h" | ||
| 34 | #include "xfs_btree.h" | ||
| 35 | #include "xfs_ialloc.h" | ||
| 36 | #include "xfs_quota.h" | ||
| 37 | #include "xfs_utils.h" | ||
| 38 | #include "xfs_trans_priv.h" | ||
| 39 | #include "xfs_inode_item.h" | ||
| 40 | #include "xfs_bmap.h" | ||
| 41 | #include "xfs_trace.h" | ||
| 42 | |||
| 43 | |||
| 44 | /* | ||
| 45 | * Define xfs inode iolock lockdep classes. We need to ensure that all active | ||
| 46 | * inodes are considered the same for lockdep purposes, including inodes that | ||
| 47 | * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to | ||
| 48 | * guarantee the locks are considered the same when there are multiple lock | ||
| 49 | * initialisation siteѕ. Also, define a reclaimable inode class so it is | ||
| 50 | * obvious in lockdep reports which class the report is against. | ||
| 51 | */ | ||
| 52 | static struct lock_class_key xfs_iolock_active; | ||
| 53 | struct lock_class_key xfs_iolock_reclaimable; | ||
| 54 | |||
| 55 | /* | ||
| 56 | * Allocate and initialise an xfs_inode. | ||
| 57 | */ | ||
| 58 | STATIC struct xfs_inode * | ||
| 59 | xfs_inode_alloc( | ||
| 60 | struct xfs_mount *mp, | ||
| 61 | xfs_ino_t ino) | ||
| 62 | { | ||
| 63 | struct xfs_inode *ip; | ||
| 64 | |||
| 65 | /* | ||
| 66 | * if this didn't occur in transactions, we could use | ||
| 67 | * KM_MAYFAIL and return NULL here on ENOMEM. Set the | ||
| 68 | * code up to do this anyway. | ||
| 69 | */ | ||
| 70 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); | ||
| 71 | if (!ip) | ||
| 72 | return NULL; | ||
| 73 | if (inode_init_always(mp->m_super, VFS_I(ip))) { | ||
| 74 | kmem_zone_free(xfs_inode_zone, ip); | ||
| 75 | return NULL; | ||
| 76 | } | ||
| 77 | |||
| 78 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
| 79 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
| 80 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
| 81 | ASSERT(completion_done(&ip->i_flush)); | ||
| 82 | ASSERT(ip->i_ino == 0); | ||
| 83 | |||
| 84 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
| 85 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
| 86 | &xfs_iolock_active, "xfs_iolock_active"); | ||
| 87 | |||
| 88 | /* initialise the xfs inode */ | ||
| 89 | ip->i_ino = ino; | ||
| 90 | ip->i_mount = mp; | ||
| 91 | memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); | ||
| 92 | ip->i_afp = NULL; | ||
| 93 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); | ||
| 94 | ip->i_flags = 0; | ||
| 95 | ip->i_update_core = 0; | ||
| 96 | ip->i_delayed_blks = 0; | ||
| 97 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); | ||
| 98 | ip->i_size = 0; | ||
| 99 | ip->i_new_size = 0; | ||
| 100 | |||
| 101 | return ip; | ||
| 102 | } | ||
| 103 | |||
| 104 | STATIC void | ||
| 105 | xfs_inode_free_callback( | ||
| 106 | struct rcu_head *head) | ||
| 107 | { | ||
| 108 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
| 109 | struct xfs_inode *ip = XFS_I(inode); | ||
| 110 | |||
| 111 | INIT_LIST_HEAD(&inode->i_dentry); | ||
| 112 | kmem_zone_free(xfs_inode_zone, ip); | ||
| 113 | } | ||
| 114 | |||
| 115 | void | ||
| 116 | xfs_inode_free( | ||
| 117 | struct xfs_inode *ip) | ||
| 118 | { | ||
| 119 | switch (ip->i_d.di_mode & S_IFMT) { | ||
| 120 | case S_IFREG: | ||
| 121 | case S_IFDIR: | ||
| 122 | case S_IFLNK: | ||
| 123 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
| 124 | break; | ||
| 125 | } | ||
| 126 | |||
| 127 | if (ip->i_afp) | ||
| 128 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
| 129 | |||
| 130 | if (ip->i_itemp) { | ||
| 131 | /* | ||
| 132 | * Only if we are shutting down the fs will we see an | ||
| 133 | * inode still in the AIL. If it is there, we should remove | ||
| 134 | * it to prevent a use-after-free from occurring. | ||
| 135 | */ | ||
| 136 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | ||
| 137 | struct xfs_ail *ailp = lip->li_ailp; | ||
| 138 | |||
| 139 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | ||
| 140 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | ||
| 141 | if (lip->li_flags & XFS_LI_IN_AIL) { | ||
| 142 | spin_lock(&ailp->xa_lock); | ||
| 143 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
| 144 | xfs_trans_ail_delete(ailp, lip); | ||
| 145 | else | ||
| 146 | spin_unlock(&ailp->xa_lock); | ||
| 147 | } | ||
| 148 | xfs_inode_item_destroy(ip); | ||
| 149 | ip->i_itemp = NULL; | ||
| 150 | } | ||
| 151 | |||
| 152 | /* asserts to verify all state is correct here */ | ||
| 153 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
| 154 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
| 155 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
| 156 | ASSERT(completion_done(&ip->i_flush)); | ||
| 157 | |||
| 158 | /* | ||
| 159 | * Because we use RCU freeing we need to ensure the inode always | ||
| 160 | * appears to be reclaimed with an invalid inode number when in the | ||
| 161 | * free state. The ip->i_flags_lock provides the barrier against lookup | ||
| 162 | * races. | ||
| 163 | */ | ||
| 164 | spin_lock(&ip->i_flags_lock); | ||
| 165 | ip->i_flags = XFS_IRECLAIM; | ||
| 166 | ip->i_ino = 0; | ||
| 167 | spin_unlock(&ip->i_flags_lock); | ||
| 168 | |||
| 169 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); | ||
| 170 | } | ||
| 171 | |||
| 172 | /* | ||
| 173 | * Check the validity of the inode we just found it the cache | ||
| 174 | */ | ||
| 175 | static int | ||
| 176 | xfs_iget_cache_hit( | ||
| 177 | struct xfs_perag *pag, | ||
| 178 | struct xfs_inode *ip, | ||
| 179 | xfs_ino_t ino, | ||
| 180 | int flags, | ||
| 181 | int lock_flags) __releases(RCU) | ||
| 182 | { | ||
| 183 | struct inode *inode = VFS_I(ip); | ||
| 184 | struct xfs_mount *mp = ip->i_mount; | ||
| 185 | int error; | ||
| 186 | |||
| 187 | /* | ||
| 188 | * check for re-use of an inode within an RCU grace period due to the | ||
| 189 | * radix tree nodes not being updated yet. We monitor for this by | ||
| 190 | * setting the inode number to zero before freeing the inode structure. | ||
| 191 | * If the inode has been reallocated and set up, then the inode number | ||
| 192 | * will not match, so check for that, too. | ||
| 193 | */ | ||
| 194 | spin_lock(&ip->i_flags_lock); | ||
| 195 | if (ip->i_ino != ino) { | ||
| 196 | trace_xfs_iget_skip(ip); | ||
| 197 | XFS_STATS_INC(xs_ig_frecycle); | ||
| 198 | error = EAGAIN; | ||
| 199 | goto out_error; | ||
| 200 | } | ||
| 201 | |||
| 202 | |||
| 203 | /* | ||
| 204 | * If we are racing with another cache hit that is currently | ||
| 205 | * instantiating this inode or currently recycling it out of | ||
| 206 | * reclaimabe state, wait for the initialisation to complete | ||
| 207 | * before continuing. | ||
| 208 | * | ||
| 209 | * XXX(hch): eventually we should do something equivalent to | ||
| 210 | * wait_on_inode to wait for these flags to be cleared | ||
| 211 | * instead of polling for it. | ||
| 212 | */ | ||
| 213 | if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { | ||
| 214 | trace_xfs_iget_skip(ip); | ||
| 215 | XFS_STATS_INC(xs_ig_frecycle); | ||
| 216 | error = EAGAIN; | ||
| 217 | goto out_error; | ||
| 218 | } | ||
| 219 | |||
| 220 | /* | ||
| 221 | * If lookup is racing with unlink return an error immediately. | ||
| 222 | */ | ||
| 223 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | ||
| 224 | error = ENOENT; | ||
| 225 | goto out_error; | ||
| 226 | } | ||
| 227 | |||
| 228 | /* | ||
| 229 | * If IRECLAIMABLE is set, we've torn down the VFS inode already. | ||
| 230 | * Need to carefully get it back into useable state. | ||
| 231 | */ | ||
| 232 | if (ip->i_flags & XFS_IRECLAIMABLE) { | ||
| 233 | trace_xfs_iget_reclaim(ip); | ||
| 234 | |||
| 235 | /* | ||
| 236 | * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode | ||
| 237 | * from stomping over us while we recycle the inode. We can't | ||
| 238 | * clear the radix tree reclaimable tag yet as it requires | ||
| 239 | * pag_ici_lock to be held exclusive. | ||
| 240 | */ | ||
| 241 | ip->i_flags |= XFS_IRECLAIM; | ||
| 242 | |||
| 243 | spin_unlock(&ip->i_flags_lock); | ||
| 244 | rcu_read_unlock(); | ||
| 245 | |||
| 246 | error = -inode_init_always(mp->m_super, inode); | ||
| 247 | if (error) { | ||
| 248 | /* | ||
| 249 | * Re-initializing the inode failed, and we are in deep | ||
| 250 | * trouble. Try to re-add it to the reclaim list. | ||
| 251 | */ | ||
| 252 | rcu_read_lock(); | ||
| 253 | spin_lock(&ip->i_flags_lock); | ||
| 254 | |||
| 255 | ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); | ||
| 256 | ASSERT(ip->i_flags & XFS_IRECLAIMABLE); | ||
| 257 | trace_xfs_iget_reclaim_fail(ip); | ||
| 258 | goto out_error; | ||
| 259 | } | ||
| 260 | |||
| 261 | spin_lock(&pag->pag_ici_lock); | ||
| 262 | spin_lock(&ip->i_flags_lock); | ||
| 263 | |||
| 264 | /* | ||
| 265 | * Clear the per-lifetime state in the inode as we are now | ||
| 266 | * effectively a new inode and need to return to the initial | ||
| 267 | * state before reuse occurs. | ||
| 268 | */ | ||
| 269 | ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; | ||
| 270 | ip->i_flags |= XFS_INEW; | ||
| 271 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | ||
| 272 | inode->i_state = I_NEW; | ||
| 273 | |||
| 274 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | ||
| 275 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
| 276 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
| 277 | &xfs_iolock_active, "xfs_iolock_active"); | ||
| 278 | |||
| 279 | spin_unlock(&ip->i_flags_lock); | ||
| 280 | spin_unlock(&pag->pag_ici_lock); | ||
| 281 | } else { | ||
| 282 | /* If the VFS inode is being torn down, pause and try again. */ | ||
| 283 | if (!igrab(inode)) { | ||
| 284 | trace_xfs_iget_skip(ip); | ||
| 285 | error = EAGAIN; | ||
| 286 | goto out_error; | ||
| 287 | } | ||
| 288 | |||
| 289 | /* We've got a live one. */ | ||
| 290 | spin_unlock(&ip->i_flags_lock); | ||
| 291 | rcu_read_unlock(); | ||
| 292 | trace_xfs_iget_hit(ip); | ||
| 293 | } | ||
| 294 | |||
| 295 | if (lock_flags != 0) | ||
| 296 | xfs_ilock(ip, lock_flags); | ||
| 297 | |||
| 298 | xfs_iflags_clear(ip, XFS_ISTALE); | ||
| 299 | XFS_STATS_INC(xs_ig_found); | ||
| 300 | |||
| 301 | return 0; | ||
| 302 | |||
| 303 | out_error: | ||
| 304 | spin_unlock(&ip->i_flags_lock); | ||
| 305 | rcu_read_unlock(); | ||
| 306 | return error; | ||
| 307 | } | ||
| 308 | |||
| 309 | |||
| 310 | static int | ||
| 311 | xfs_iget_cache_miss( | ||
| 312 | struct xfs_mount *mp, | ||
| 313 | struct xfs_perag *pag, | ||
| 314 | xfs_trans_t *tp, | ||
| 315 | xfs_ino_t ino, | ||
| 316 | struct xfs_inode **ipp, | ||
| 317 | int flags, | ||
| 318 | int lock_flags) | ||
| 319 | { | ||
| 320 | struct xfs_inode *ip; | ||
| 321 | int error; | ||
| 322 | xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); | ||
| 323 | |||
| 324 | ip = xfs_inode_alloc(mp, ino); | ||
| 325 | if (!ip) | ||
| 326 | return ENOMEM; | ||
| 327 | |||
| 328 | error = xfs_iread(mp, tp, ip, flags); | ||
| 329 | if (error) | ||
| 330 | goto out_destroy; | ||
| 331 | |||
| 332 | trace_xfs_iget_miss(ip); | ||
| 333 | |||
| 334 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | ||
| 335 | error = ENOENT; | ||
| 336 | goto out_destroy; | ||
| 337 | } | ||
| 338 | |||
| 339 | /* | ||
| 340 | * Preload the radix tree so we can insert safely under the | ||
| 341 | * write spinlock. Note that we cannot sleep inside the preload | ||
| 342 | * region. | ||
| 343 | */ | ||
| 344 | if (radix_tree_preload(GFP_KERNEL)) { | ||
| 345 | error = EAGAIN; | ||
| 346 | goto out_destroy; | ||
| 347 | } | ||
| 348 | |||
| 349 | /* | ||
| 350 | * Because the inode hasn't been added to the radix-tree yet it can't | ||
| 351 | * be found by another thread, so we can do the non-sleeping lock here. | ||
| 352 | */ | ||
| 353 | if (lock_flags) { | ||
| 354 | if (!xfs_ilock_nowait(ip, lock_flags)) | ||
| 355 | BUG(); | ||
| 356 | } | ||
| 357 | |||
| 358 | spin_lock(&pag->pag_ici_lock); | ||
| 359 | |||
| 360 | /* insert the new inode */ | ||
| 361 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); | ||
| 362 | if (unlikely(error)) { | ||
| 363 | WARN_ON(error != -EEXIST); | ||
| 364 | XFS_STATS_INC(xs_ig_dup); | ||
| 365 | error = EAGAIN; | ||
| 366 | goto out_preload_end; | ||
| 367 | } | ||
| 368 | |||
| 369 | /* These values _must_ be set before releasing the radix tree lock! */ | ||
| 370 | ip->i_udquot = ip->i_gdquot = NULL; | ||
| 371 | xfs_iflags_set(ip, XFS_INEW); | ||
| 372 | |||
| 373 | spin_unlock(&pag->pag_ici_lock); | ||
| 374 | radix_tree_preload_end(); | ||
| 375 | |||
| 376 | *ipp = ip; | ||
| 377 | return 0; | ||
| 378 | |||
| 379 | out_preload_end: | ||
| 380 | spin_unlock(&pag->pag_ici_lock); | ||
| 381 | radix_tree_preload_end(); | ||
| 382 | if (lock_flags) | ||
| 383 | xfs_iunlock(ip, lock_flags); | ||
| 384 | out_destroy: | ||
| 385 | __destroy_inode(VFS_I(ip)); | ||
| 386 | xfs_inode_free(ip); | ||
| 387 | return error; | ||
| 388 | } | ||
| 389 | |||
| 390 | /* | ||
| 391 | * Look up an inode by number in the given file system. | ||
| 392 | * The inode is looked up in the cache held in each AG. | ||
| 393 | * If the inode is found in the cache, initialise the vfs inode | ||
| 394 | * if necessary. | ||
| 395 | * | ||
| 396 | * If it is not in core, read it in from the file system's device, | ||
| 397 | * add it to the cache and initialise the vfs inode. | ||
| 398 | * | ||
| 399 | * The inode is locked according to the value of the lock_flags parameter. | ||
| 400 | * This flag parameter indicates how and if the inode's IO lock and inode lock | ||
| 401 | * should be taken. | ||
| 402 | * | ||
| 403 | * mp -- the mount point structure for the current file system. It points | ||
| 404 | * to the inode hash table. | ||
| 405 | * tp -- a pointer to the current transaction if there is one. This is | ||
| 406 | * simply passed through to the xfs_iread() call. | ||
| 407 | * ino -- the number of the inode desired. This is the unique identifier | ||
| 408 | * within the file system for the inode being requested. | ||
| 409 | * lock_flags -- flags indicating how to lock the inode. See the comment | ||
| 410 | * for xfs_ilock() for a list of valid values. | ||
| 411 | */ | ||
| 412 | int | ||
| 413 | xfs_iget( | ||
| 414 | xfs_mount_t *mp, | ||
| 415 | xfs_trans_t *tp, | ||
| 416 | xfs_ino_t ino, | ||
| 417 | uint flags, | ||
| 418 | uint lock_flags, | ||
| 419 | xfs_inode_t **ipp) | ||
| 420 | { | ||
| 421 | xfs_inode_t *ip; | ||
| 422 | int error; | ||
| 423 | xfs_perag_t *pag; | ||
| 424 | xfs_agino_t agino; | ||
| 425 | |||
| 426 | /* reject inode numbers outside existing AGs */ | ||
| 427 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) | ||
| 428 | return EINVAL; | ||
| 429 | |||
| 430 | /* get the perag structure and ensure that it's inode capable */ | ||
| 431 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); | ||
| 432 | agino = XFS_INO_TO_AGINO(mp, ino); | ||
| 433 | |||
| 434 | again: | ||
| 435 | error = 0; | ||
| 436 | rcu_read_lock(); | ||
| 437 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | ||
| 438 | |||
| 439 | if (ip) { | ||
| 440 | error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); | ||
| 441 | if (error) | ||
| 442 | goto out_error_or_again; | ||
| 443 | } else { | ||
| 444 | rcu_read_unlock(); | ||
| 445 | XFS_STATS_INC(xs_ig_missed); | ||
| 446 | |||
| 447 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, | ||
| 448 | flags, lock_flags); | ||
| 449 | if (error) | ||
| 450 | goto out_error_or_again; | ||
| 451 | } | ||
| 452 | xfs_perag_put(pag); | ||
| 453 | |||
| 454 | *ipp = ip; | ||
| 455 | |||
| 456 | ASSERT(ip->i_df.if_ext_max == | ||
| 457 | XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); | ||
| 458 | /* | ||
| 459 | * If we have a real type for an on-disk inode, we can set ops(&unlock) | ||
| 460 | * now. If it's a new inode being created, xfs_ialloc will handle it. | ||
| 461 | */ | ||
| 462 | if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) | ||
| 463 | xfs_setup_inode(ip); | ||
| 464 | return 0; | ||
| 465 | |||
| 466 | out_error_or_again: | ||
| 467 | if (error == EAGAIN) { | ||
| 468 | delay(1); | ||
| 469 | goto again; | ||
| 470 | } | ||
| 471 | xfs_perag_put(pag); | ||
| 472 | return error; | ||
| 473 | } | ||
| 474 | |||
| 475 | /* | ||
| 476 | * This is a wrapper routine around the xfs_ilock() routine | ||
| 477 | * used to centralize some grungy code. It is used in places | ||
| 478 | * that wish to lock the inode solely for reading the extents. | ||
| 479 | * The reason these places can't just call xfs_ilock(SHARED) | ||
| 480 | * is that the inode lock also guards to bringing in of the | ||
| 481 | * extents from disk for a file in b-tree format. If the inode | ||
| 482 | * is in b-tree format, then we need to lock the inode exclusively | ||
| 483 | * until the extents are read in. Locking it exclusively all | ||
| 484 | * the time would limit our parallelism unnecessarily, though. | ||
| 485 | * What we do instead is check to see if the extents have been | ||
| 486 | * read in yet, and only lock the inode exclusively if they | ||
| 487 | * have not. | ||
| 488 | * | ||
| 489 | * The function returns a value which should be given to the | ||
| 490 | * corresponding xfs_iunlock_map_shared(). This value is | ||
| 491 | * the mode in which the lock was actually taken. | ||
| 492 | */ | ||
| 493 | uint | ||
| 494 | xfs_ilock_map_shared( | ||
| 495 | xfs_inode_t *ip) | ||
| 496 | { | ||
| 497 | uint lock_mode; | ||
| 498 | |||
| 499 | if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) && | ||
| 500 | ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) { | ||
| 501 | lock_mode = XFS_ILOCK_EXCL; | ||
| 502 | } else { | ||
| 503 | lock_mode = XFS_ILOCK_SHARED; | ||
| 504 | } | ||
| 505 | |||
| 506 | xfs_ilock(ip, lock_mode); | ||
| 507 | |||
| 508 | return lock_mode; | ||
| 509 | } | ||
| 510 | |||
| 511 | /* | ||
| 512 | * This is simply the unlock routine to go with xfs_ilock_map_shared(). | ||
| 513 | * All it does is call xfs_iunlock() with the given lock_mode. | ||
| 514 | */ | ||
| 515 | void | ||
| 516 | xfs_iunlock_map_shared( | ||
| 517 | xfs_inode_t *ip, | ||
| 518 | unsigned int lock_mode) | ||
| 519 | { | ||
| 520 | xfs_iunlock(ip, lock_mode); | ||
| 521 | } | ||
| 522 | |||
| 523 | /* | ||
| 524 | * The xfs inode contains 2 locks: a multi-reader lock called the | ||
| 525 | * i_iolock and a multi-reader lock called the i_lock. This routine | ||
| 526 | * allows either or both of the locks to be obtained. | ||
| 527 | * | ||
| 528 | * The 2 locks should always be ordered so that the IO lock is | ||
| 529 | * obtained first in order to prevent deadlock. | ||
| 530 | * | ||
| 531 | * ip -- the inode being locked | ||
| 532 | * lock_flags -- this parameter indicates the inode's locks | ||
| 533 | * to be locked. It can be: | ||
| 534 | * XFS_IOLOCK_SHARED, | ||
| 535 | * XFS_IOLOCK_EXCL, | ||
| 536 | * XFS_ILOCK_SHARED, | ||
| 537 | * XFS_ILOCK_EXCL, | ||
| 538 | * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, | ||
| 539 | * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, | ||
| 540 | * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, | ||
| 541 | * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | ||
| 542 | */ | ||
| 543 | void | ||
| 544 | xfs_ilock( | ||
| 545 | xfs_inode_t *ip, | ||
| 546 | uint lock_flags) | ||
| 547 | { | ||
| 548 | /* | ||
| 549 | * You can't set both SHARED and EXCL for the same lock, | ||
| 550 | * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, | ||
| 551 | * and XFS_ILOCK_EXCL are valid values to set in lock_flags. | ||
| 552 | */ | ||
| 553 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != | ||
| 554 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | ||
| 555 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | ||
| 556 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | ||
| 557 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); | ||
| 558 | |||
| 559 | if (lock_flags & XFS_IOLOCK_EXCL) | ||
| 560 | mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); | ||
| 561 | else if (lock_flags & XFS_IOLOCK_SHARED) | ||
| 562 | mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); | ||
| 563 | |||
| 564 | if (lock_flags & XFS_ILOCK_EXCL) | ||
| 565 | mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); | ||
| 566 | else if (lock_flags & XFS_ILOCK_SHARED) | ||
| 567 | mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); | ||
| 568 | |||
| 569 | trace_xfs_ilock(ip, lock_flags, _RET_IP_); | ||
| 570 | } | ||
| 571 | |||
| 572 | /* | ||
| 573 | * This is just like xfs_ilock(), except that the caller | ||
| 574 | * is guaranteed not to sleep. It returns 1 if it gets | ||
| 575 | * the requested locks and 0 otherwise. If the IO lock is | ||
| 576 | * obtained but the inode lock cannot be, then the IO lock | ||
| 577 | * is dropped before returning. | ||
| 578 | * | ||
| 579 | * ip -- the inode being locked | ||
| 580 | * lock_flags -- this parameter indicates the inode's locks to be | ||
| 581 | * to be locked. See the comment for xfs_ilock() for a list | ||
| 582 | * of valid values. | ||
| 583 | */ | ||
| 584 | int | ||
| 585 | xfs_ilock_nowait( | ||
| 586 | xfs_inode_t *ip, | ||
| 587 | uint lock_flags) | ||
| 588 | { | ||
| 589 | /* | ||
| 590 | * You can't set both SHARED and EXCL for the same lock, | ||
| 591 | * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, | ||
| 592 | * and XFS_ILOCK_EXCL are valid values to set in lock_flags. | ||
| 593 | */ | ||
| 594 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != | ||
| 595 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | ||
| 596 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | ||
| 597 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | ||
| 598 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); | ||
| 599 | |||
| 600 | if (lock_flags & XFS_IOLOCK_EXCL) { | ||
| 601 | if (!mrtryupdate(&ip->i_iolock)) | ||
| 602 | goto out; | ||
| 603 | } else if (lock_flags & XFS_IOLOCK_SHARED) { | ||
| 604 | if (!mrtryaccess(&ip->i_iolock)) | ||
| 605 | goto out; | ||
| 606 | } | ||
| 607 | if (lock_flags & XFS_ILOCK_EXCL) { | ||
| 608 | if (!mrtryupdate(&ip->i_lock)) | ||
| 609 | goto out_undo_iolock; | ||
| 610 | } else if (lock_flags & XFS_ILOCK_SHARED) { | ||
| 611 | if (!mrtryaccess(&ip->i_lock)) | ||
| 612 | goto out_undo_iolock; | ||
| 613 | } | ||
| 614 | trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_); | ||
| 615 | return 1; | ||
| 616 | |||
| 617 | out_undo_iolock: | ||
| 618 | if (lock_flags & XFS_IOLOCK_EXCL) | ||
| 619 | mrunlock_excl(&ip->i_iolock); | ||
| 620 | else if (lock_flags & XFS_IOLOCK_SHARED) | ||
| 621 | mrunlock_shared(&ip->i_iolock); | ||
| 622 | out: | ||
| 623 | return 0; | ||
| 624 | } | ||
| 625 | |||
| 626 | /* | ||
| 627 | * xfs_iunlock() is used to drop the inode locks acquired with | ||
| 628 | * xfs_ilock() and xfs_ilock_nowait(). The caller must pass | ||
| 629 | * in the flags given to xfs_ilock() or xfs_ilock_nowait() so | ||
| 630 | * that we know which locks to drop. | ||
| 631 | * | ||
| 632 | * ip -- the inode being unlocked | ||
| 633 | * lock_flags -- this parameter indicates the inode's locks to be | ||
| 634 | * to be unlocked. See the comment for xfs_ilock() for a list | ||
| 635 | * of valid values for this parameter. | ||
| 636 | * | ||
| 637 | */ | ||
| 638 | void | ||
| 639 | xfs_iunlock( | ||
| 640 | xfs_inode_t *ip, | ||
| 641 | uint lock_flags) | ||
| 642 | { | ||
| 643 | /* | ||
| 644 | * You can't set both SHARED and EXCL for the same lock, | ||
| 645 | * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, | ||
| 646 | * and XFS_ILOCK_EXCL are valid values to set in lock_flags. | ||
| 647 | */ | ||
| 648 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != | ||
| 649 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | ||
| 650 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | ||
| 651 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | ||
| 652 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY | | ||
| 653 | XFS_LOCK_DEP_MASK)) == 0); | ||
| 654 | ASSERT(lock_flags != 0); | ||
| 655 | |||
| 656 | if (lock_flags & XFS_IOLOCK_EXCL) | ||
| 657 | mrunlock_excl(&ip->i_iolock); | ||
| 658 | else if (lock_flags & XFS_IOLOCK_SHARED) | ||
| 659 | mrunlock_shared(&ip->i_iolock); | ||
| 660 | |||
| 661 | if (lock_flags & XFS_ILOCK_EXCL) | ||
| 662 | mrunlock_excl(&ip->i_lock); | ||
| 663 | else if (lock_flags & XFS_ILOCK_SHARED) | ||
| 664 | mrunlock_shared(&ip->i_lock); | ||
| 665 | |||
| 666 | if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) && | ||
| 667 | !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) { | ||
| 668 | /* | ||
| 669 | * Let the AIL know that this item has been unlocked in case | ||
| 670 | * it is in the AIL and anyone is waiting on it. Don't do | ||
| 671 | * this if the caller has asked us not to. | ||
| 672 | */ | ||
| 673 | xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp, | ||
| 674 | (xfs_log_item_t*)(ip->i_itemp)); | ||
| 675 | } | ||
| 676 | trace_xfs_iunlock(ip, lock_flags, _RET_IP_); | ||
| 677 | } | ||
| 678 | |||
| 679 | /* | ||
| 680 | * give up write locks. the i/o lock cannot be held nested | ||
| 681 | * if it is being demoted. | ||
| 682 | */ | ||
| 683 | void | ||
| 684 | xfs_ilock_demote( | ||
| 685 | xfs_inode_t *ip, | ||
| 686 | uint lock_flags) | ||
| 687 | { | ||
| 688 | ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); | ||
| 689 | ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); | ||
| 690 | |||
| 691 | if (lock_flags & XFS_ILOCK_EXCL) | ||
| 692 | mrdemote(&ip->i_lock); | ||
| 693 | if (lock_flags & XFS_IOLOCK_EXCL) | ||
| 694 | mrdemote(&ip->i_iolock); | ||
| 695 | |||
| 696 | trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); | ||
| 697 | } | ||
| 698 | |||
| 699 | #ifdef DEBUG | ||
| 700 | int | ||
| 701 | xfs_isilocked( | ||
| 702 | xfs_inode_t *ip, | ||
| 703 | uint lock_flags) | ||
| 704 | { | ||
| 705 | if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { | ||
| 706 | if (!(lock_flags & XFS_ILOCK_SHARED)) | ||
| 707 | return !!ip->i_lock.mr_writer; | ||
| 708 | return rwsem_is_locked(&ip->i_lock.mr_lock); | ||
| 709 | } | ||
| 710 | |||
| 711 | if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { | ||
| 712 | if (!(lock_flags & XFS_IOLOCK_SHARED)) | ||
| 713 | return !!ip->i_iolock.mr_writer; | ||
| 714 | return rwsem_is_locked(&ip->i_iolock.mr_lock); | ||
| 715 | } | ||
| 716 | |||
| 717 | ASSERT(0); | ||
| 718 | return 0; | ||
| 719 | } | ||
| 720 | #endif | ||
