diff options
author | Christoph Hellwig <hch@infradead.org> | 2011-12-18 15:00:09 -0500 |
---|---|---|
committer | Ben Myers <bpm@sgi.com> | 2012-01-17 16:06:45 -0500 |
commit | 474fce067521a40dbacc722e8ba119e81c2d31bf (patch) | |
tree | fd923aa42a5304182e8a8c64ca5d130f9afef286 /fs | |
parent | 49e4c70e52a2bc2090e5a4e003e2888af21d6a2b (diff) |
xfs: replace i_flock with a sleeping bitlock
We almost never block on i_flock, the exception is synchronous inode
flushing. Instead of bloating the inode with a 16/24-byte completion
that we abuse as a semaphore just implement it as a bitlock that uses
a bit waitqueue for the rare sleeping path. This primarily is a
tradeoff between a much smaller inode and a faster non-blocking
path vs faster wakeups, and we are much better off with the former.
A small downside is that we will lose lockdep checking for i_flock, but
given that it's always taken inside the ilock that should be acceptable.
Note that for example the inode writeback locking is implemented in a
very similar way.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alex Elder <aelder@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/xfs_iget.c | 20 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 78 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_sync.c | 9 |
6 files changed, 76 insertions, 46 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index f180ce896cd7..a7cf7139f9ad 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -77,7 +77,7 @@ xfs_inode_alloc( | |||
77 | 77 | ||
78 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 78 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
79 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 79 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
80 | ASSERT(completion_done(&ip->i_flush)); | 80 | ASSERT(!xfs_isiflocked(ip)); |
81 | ASSERT(ip->i_ino == 0); | 81 | ASSERT(ip->i_ino == 0); |
82 | 82 | ||
83 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 83 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
@@ -150,7 +150,7 @@ xfs_inode_free( | |||
150 | /* asserts to verify all state is correct here */ | 150 | /* asserts to verify all state is correct here */ |
151 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 151 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
152 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 152 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
153 | ASSERT(completion_done(&ip->i_flush)); | 153 | ASSERT(!xfs_isiflocked(ip)); |
154 | 154 | ||
155 | /* | 155 | /* |
156 | * Because we use RCU freeing we need to ensure the inode always | 156 | * Because we use RCU freeing we need to ensure the inode always |
@@ -713,3 +713,19 @@ xfs_isilocked( | |||
713 | return 0; | 713 | return 0; |
714 | } | 714 | } |
715 | #endif | 715 | #endif |
716 | |||
717 | void | ||
718 | __xfs_iflock( | ||
719 | struct xfs_inode *ip) | ||
720 | { | ||
721 | wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); | ||
722 | DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); | ||
723 | |||
724 | do { | ||
725 | prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | ||
726 | if (xfs_isiflocked(ip)) | ||
727 | io_schedule(); | ||
728 | } while (!xfs_iflock_nowait(ip)); | ||
729 | |||
730 | finish_wait(wq, &wait.wait); | ||
731 | } | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 96b29e3286db..eeb60d31b086 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -2396,7 +2396,7 @@ xfs_iflush( | |||
2396 | XFS_STATS_INC(xs_iflush_count); | 2396 | XFS_STATS_INC(xs_iflush_count); |
2397 | 2397 | ||
2398 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 2398 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
2399 | ASSERT(!completion_done(&ip->i_flush)); | 2399 | ASSERT(xfs_isiflocked(ip)); |
2400 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 2400 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
2401 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); | 2401 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); |
2402 | 2402 | ||
@@ -2512,7 +2512,7 @@ xfs_iflush_int( | |||
2512 | #endif | 2512 | #endif |
2513 | 2513 | ||
2514 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 2514 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
2515 | ASSERT(!completion_done(&ip->i_flush)); | 2515 | ASSERT(xfs_isiflocked(ip)); |
2516 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 2516 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
2517 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); | 2517 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); |
2518 | 2518 | ||
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index be8dc0c2cf52..960d2a89b3ac 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -237,7 +237,6 @@ typedef struct xfs_inode { | |||
237 | struct xfs_inode_log_item *i_itemp; /* logging information */ | 237 | struct xfs_inode_log_item *i_itemp; /* logging information */ |
238 | mrlock_t i_lock; /* inode lock */ | 238 | mrlock_t i_lock; /* inode lock */ |
239 | mrlock_t i_iolock; /* inode IO lock */ | 239 | mrlock_t i_iolock; /* inode IO lock */ |
240 | struct completion i_flush; /* inode flush completion q */ | ||
241 | atomic_t i_pincount; /* inode pin count */ | 240 | atomic_t i_pincount; /* inode pin count */ |
242 | wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ | 241 | wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ |
243 | spinlock_t i_flags_lock; /* inode i_flags lock */ | 242 | spinlock_t i_flags_lock; /* inode i_flags lock */ |
@@ -324,6 +323,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) | |||
324 | return ret; | 323 | return ret; |
325 | } | 324 | } |
326 | 325 | ||
326 | static inline int | ||
327 | xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags) | ||
328 | { | ||
329 | int ret; | ||
330 | |||
331 | spin_lock(&ip->i_flags_lock); | ||
332 | ret = ip->i_flags & flags; | ||
333 | if (!ret) | ||
334 | ip->i_flags |= flags; | ||
335 | spin_unlock(&ip->i_flags_lock); | ||
336 | return ret; | ||
337 | } | ||
338 | |||
327 | /* | 339 | /* |
328 | * Project quota id helpers (previously projid was 16bit only | 340 | * Project quota id helpers (previously projid was 16bit only |
329 | * and using two 16bit values to hold new 32bit projid was chosen | 341 | * and using two 16bit values to hold new 32bit projid was chosen |
@@ -344,35 +356,17 @@ xfs_set_projid(struct xfs_inode *ip, | |||
344 | } | 356 | } |
345 | 357 | ||
346 | /* | 358 | /* |
347 | * Manage the i_flush queue embedded in the inode. This completion | ||
348 | * queue synchronizes processes attempting to flush the in-core | ||
349 | * inode back to disk. | ||
350 | */ | ||
351 | static inline void xfs_iflock(xfs_inode_t *ip) | ||
352 | { | ||
353 | wait_for_completion(&ip->i_flush); | ||
354 | } | ||
355 | |||
356 | static inline int xfs_iflock_nowait(xfs_inode_t *ip) | ||
357 | { | ||
358 | return try_wait_for_completion(&ip->i_flush); | ||
359 | } | ||
360 | |||
361 | static inline void xfs_ifunlock(xfs_inode_t *ip) | ||
362 | { | ||
363 | complete(&ip->i_flush); | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * In-core inode flags. | 359 | * In-core inode flags. |
368 | */ | 360 | */ |
369 | #define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */ | 361 | #define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */ |
370 | #define XFS_ISTALE 0x0002 /* inode has been staled */ | 362 | #define XFS_ISTALE (1 << 1) /* inode has been staled */ |
371 | #define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ | 363 | #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ |
372 | #define XFS_INEW 0x0008 /* inode has just been allocated */ | 364 | #define XFS_INEW (1 << 3) /* inode has just been allocated */ |
373 | #define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ | 365 | #define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */ |
374 | #define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ | 366 | #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ |
375 | #define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ | 367 | #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ |
368 | #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ | ||
369 | #define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT) | ||
376 | 370 | ||
377 | /* | 371 | /* |
378 | * Per-lifetime flags need to be reset when re-using a reclaimable inode during | 372 | * Per-lifetime flags need to be reset when re-using a reclaimable inode during |
@@ -385,6 +379,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
385 | XFS_IFILESTREAM); | 379 | XFS_IFILESTREAM); |
386 | 380 | ||
387 | /* | 381 | /* |
382 | * Synchronize processes attempting to flush the in-core inode back to disk. | ||
383 | */ | ||
384 | |||
385 | extern void __xfs_iflock(struct xfs_inode *ip); | ||
386 | |||
387 | static inline int xfs_iflock_nowait(struct xfs_inode *ip) | ||
388 | { | ||
389 | return !xfs_iflags_test_and_set(ip, XFS_IFLOCK); | ||
390 | } | ||
391 | |||
392 | static inline void xfs_iflock(struct xfs_inode *ip) | ||
393 | { | ||
394 | if (!xfs_iflock_nowait(ip)) | ||
395 | __xfs_iflock(ip); | ||
396 | } | ||
397 | |||
398 | static inline void xfs_ifunlock(struct xfs_inode *ip) | ||
399 | { | ||
400 | xfs_iflags_clear(ip, XFS_IFLOCK); | ||
401 | wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); | ||
402 | } | ||
403 | |||
404 | static inline int xfs_isiflocked(struct xfs_inode *ip) | ||
405 | { | ||
406 | return xfs_iflags_test(ip, XFS_IFLOCK); | ||
407 | } | ||
408 | |||
409 | /* | ||
388 | * Flags for inode locking. | 410 | * Flags for inode locking. |
389 | * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) | 411 | * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) |
390 | * 1<<16 - 1<<32-1 -- lockdep annotation (integers) | 412 | * 1<<16 - 1<<32-1 -- lockdep annotation (integers) |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 2b6b4fcef49e..c8d4ce0efd5a 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -717,7 +717,7 @@ xfs_inode_item_pushbuf( | |||
717 | * If a flush is not in progress anymore, chances are that the | 717 | * If a flush is not in progress anymore, chances are that the |
718 | * inode was taken off the AIL. So, just get out. | 718 | * inode was taken off the AIL. So, just get out. |
719 | */ | 719 | */ |
720 | if (completion_done(&ip->i_flush) || | 720 | if (!xfs_isiflocked(ip) || |
721 | !(lip->li_flags & XFS_LI_IN_AIL)) { | 721 | !(lip->li_flags & XFS_LI_IN_AIL)) { |
722 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 722 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
723 | return true; | 723 | return true; |
@@ -750,7 +750,7 @@ xfs_inode_item_push( | |||
750 | struct xfs_inode *ip = iip->ili_inode; | 750 | struct xfs_inode *ip = iip->ili_inode; |
751 | 751 | ||
752 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | 752 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); |
753 | ASSERT(!completion_done(&ip->i_flush)); | 753 | ASSERT(xfs_isiflocked(ip)); |
754 | 754 | ||
755 | /* | 755 | /* |
756 | * Since we were able to lock the inode's flush lock and | 756 | * Since we were able to lock the inode's flush lock and |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 281961c1d81a..6851fa7b1afa 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -829,13 +829,6 @@ xfs_fs_inode_init_once( | |||
829 | atomic_set(&ip->i_pincount, 0); | 829 | atomic_set(&ip->i_pincount, 0); |
830 | spin_lock_init(&ip->i_flags_lock); | 830 | spin_lock_init(&ip->i_flags_lock); |
831 | init_waitqueue_head(&ip->i_ipin_wait); | 831 | init_waitqueue_head(&ip->i_ipin_wait); |
832 | /* | ||
833 | * Because we want to use a counting completion, complete | ||
834 | * the flush completion once to allow a single access to | ||
835 | * the flush completion without blocking. | ||
836 | */ | ||
837 | init_completion(&ip->i_flush); | ||
838 | complete(&ip->i_flush); | ||
839 | 832 | ||
840 | mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, | 833 | mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, |
841 | "xfsino", ip->i_ino); | 834 | "xfsino", ip->i_ino); |
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 72c01a1c16e7..40b75eecd2b4 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
@@ -707,14 +707,13 @@ xfs_reclaim_inode_grab( | |||
707 | return 1; | 707 | return 1; |
708 | 708 | ||
709 | /* | 709 | /* |
710 | * do some unlocked checks first to avoid unnecessary lock traffic. | 710 | * If we are asked for non-blocking operation, do unlocked checks to |
711 | * The first is a flush lock check, the second is a already in reclaim | 711 | * see if the inode already is being flushed or in reclaim to avoid |
712 | * check. Only do these checks if we are not going to block on locks. | 712 | * lock traffic. |
713 | */ | 713 | */ |
714 | if ((flags & SYNC_TRYLOCK) && | 714 | if ((flags & SYNC_TRYLOCK) && |
715 | (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { | 715 | __xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM)) |
716 | return 1; | 716 | return 1; |
717 | } | ||
718 | 717 | ||
719 | /* | 718 | /* |
720 | * The radix tree lock here protects a thread in xfs_iget from racing | 719 | * The radix tree lock here protects a thread in xfs_iget from racing |