aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-12-18 15:00:09 -0500
committerBen Myers <bpm@sgi.com>2012-01-17 16:06:45 -0500
commit474fce067521a40dbacc722e8ba119e81c2d31bf (patch)
treefd923aa42a5304182e8a8c64ca5d130f9afef286 /fs
parent49e4c70e52a2bc2090e5a4e003e2888af21d6a2b (diff)
xfs: replace i_flock with a sleeping bitlock
We almost never block on i_flock, the exception is synchronous inode flushing. Instead of bloating the inode with a 16/24-byte completion that we abuse as a semaphore just implement it as a bitlock that uses a bit waitqueue for the rare sleeping path. This primarily is a tradeoff between a much smaller inode and a faster non-blocking path vs faster wakeups, and we are much better off with the former. A small downside is that we will lose lockdep checking for i_flock, but given that it's always taken inside the ilock that should be acceptable. Note that for example the inode writeback locking is implemented in a very similar way. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Alex Elder <aelder@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_iget.c20
-rw-r--r--fs/xfs/xfs_inode.c4
-rw-r--r--fs/xfs/xfs_inode.h78
-rw-r--r--fs/xfs/xfs_inode_item.c4
-rw-r--r--fs/xfs/xfs_super.c7
-rw-r--r--fs/xfs/xfs_sync.c9
6 files changed, 76 insertions, 46 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index f180ce896cd7..a7cf7139f9ad 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -77,7 +77,7 @@ xfs_inode_alloc(
77 77
78 ASSERT(atomic_read(&ip->i_pincount) == 0); 78 ASSERT(atomic_read(&ip->i_pincount) == 0);
79 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 79 ASSERT(!spin_is_locked(&ip->i_flags_lock));
80 ASSERT(completion_done(&ip->i_flush)); 80 ASSERT(!xfs_isiflocked(ip));
81 ASSERT(ip->i_ino == 0); 81 ASSERT(ip->i_ino == 0);
82 82
83 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 83 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
@@ -150,7 +150,7 @@ xfs_inode_free(
150 /* asserts to verify all state is correct here */ 150 /* asserts to verify all state is correct here */
151 ASSERT(atomic_read(&ip->i_pincount) == 0); 151 ASSERT(atomic_read(&ip->i_pincount) == 0);
152 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 152 ASSERT(!spin_is_locked(&ip->i_flags_lock));
153 ASSERT(completion_done(&ip->i_flush)); 153 ASSERT(!xfs_isiflocked(ip));
154 154
155 /* 155 /*
156 * Because we use RCU freeing we need to ensure the inode always 156 * Because we use RCU freeing we need to ensure the inode always
@@ -713,3 +713,19 @@ xfs_isilocked(
713 return 0; 713 return 0;
714} 714}
715#endif 715#endif
716
717void
718__xfs_iflock(
719 struct xfs_inode *ip)
720{
721 wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
722 DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
723
724 do {
725 prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
726 if (xfs_isiflocked(ip))
727 io_schedule();
728 } while (!xfs_iflock_nowait(ip));
729
730 finish_wait(wq, &wait.wait);
731}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 96b29e3286db..eeb60d31b086 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2396,7 +2396,7 @@ xfs_iflush(
2396 XFS_STATS_INC(xs_iflush_count); 2396 XFS_STATS_INC(xs_iflush_count);
2397 2397
2398 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2398 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2399 ASSERT(!completion_done(&ip->i_flush)); 2399 ASSERT(xfs_isiflocked(ip));
2400 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 2400 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
2401 ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 2401 ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
2402 2402
@@ -2512,7 +2512,7 @@ xfs_iflush_int(
2512#endif 2512#endif
2513 2513
2514 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2514 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2515 ASSERT(!completion_done(&ip->i_flush)); 2515 ASSERT(xfs_isiflocked(ip));
2516 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 2516 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
2517 ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 2517 ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
2518 2518
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index be8dc0c2cf52..960d2a89b3ac 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -237,7 +237,6 @@ typedef struct xfs_inode {
237 struct xfs_inode_log_item *i_itemp; /* logging information */ 237 struct xfs_inode_log_item *i_itemp; /* logging information */
238 mrlock_t i_lock; /* inode lock */ 238 mrlock_t i_lock; /* inode lock */
239 mrlock_t i_iolock; /* inode IO lock */ 239 mrlock_t i_iolock; /* inode IO lock */
240 struct completion i_flush; /* inode flush completion q */
241 atomic_t i_pincount; /* inode pin count */ 240 atomic_t i_pincount; /* inode pin count */
242 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ 241 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
243 spinlock_t i_flags_lock; /* inode i_flags lock */ 242 spinlock_t i_flags_lock; /* inode i_flags lock */
@@ -324,6 +323,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
324 return ret; 323 return ret;
325} 324}
326 325
326static inline int
327xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
328{
329 int ret;
330
331 spin_lock(&ip->i_flags_lock);
332 ret = ip->i_flags & flags;
333 if (!ret)
334 ip->i_flags |= flags;
335 spin_unlock(&ip->i_flags_lock);
336 return ret;
337}
338
327/* 339/*
328 * Project quota id helpers (previously projid was 16bit only 340 * Project quota id helpers (previously projid was 16bit only
329 * and using two 16bit values to hold new 32bit projid was chosen 341 * and using two 16bit values to hold new 32bit projid was chosen
@@ -344,35 +356,17 @@ xfs_set_projid(struct xfs_inode *ip,
344} 356}
345 357
346/* 358/*
347 * Manage the i_flush queue embedded in the inode. This completion
348 * queue synchronizes processes attempting to flush the in-core
349 * inode back to disk.
350 */
351static inline void xfs_iflock(xfs_inode_t *ip)
352{
353 wait_for_completion(&ip->i_flush);
354}
355
356static inline int xfs_iflock_nowait(xfs_inode_t *ip)
357{
358 return try_wait_for_completion(&ip->i_flush);
359}
360
361static inline void xfs_ifunlock(xfs_inode_t *ip)
362{
363 complete(&ip->i_flush);
364}
365
366/*
367 * In-core inode flags. 359 * In-core inode flags.
368 */ 360 */
369#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */ 361#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
370#define XFS_ISTALE 0x0002 /* inode has been staled */ 362#define XFS_ISTALE (1 << 1) /* inode has been staled */
371#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ 363#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
372#define XFS_INEW 0x0008 /* inode has just been allocated */ 364#define XFS_INEW (1 << 3) /* inode has just been allocated */
373#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ 365#define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */
374#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ 366#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
375#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ 367#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
368#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
369#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
376 370
377/* 371/*
378 * Per-lifetime flags need to be reset when re-using a reclaimable inode during 372 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
@@ -385,6 +379,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
385 XFS_IFILESTREAM); 379 XFS_IFILESTREAM);
386 380
387/* 381/*
382 * Synchronize processes attempting to flush the in-core inode back to disk.
383 */
384
385extern void __xfs_iflock(struct xfs_inode *ip);
386
387static inline int xfs_iflock_nowait(struct xfs_inode *ip)
388{
389 return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
390}
391
392static inline void xfs_iflock(struct xfs_inode *ip)
393{
394 if (!xfs_iflock_nowait(ip))
395 __xfs_iflock(ip);
396}
397
398static inline void xfs_ifunlock(struct xfs_inode *ip)
399{
400 xfs_iflags_clear(ip, XFS_IFLOCK);
401 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
402}
403
404static inline int xfs_isiflocked(struct xfs_inode *ip)
405{
406 return xfs_iflags_test(ip, XFS_IFLOCK);
407}
408
409/*
388 * Flags for inode locking. 410 * Flags for inode locking.
389 * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) 411 * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
390 * 1<<16 - 1<<32-1 -- lockdep annotation (integers) 412 * 1<<16 - 1<<32-1 -- lockdep annotation (integers)
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 2b6b4fcef49e..c8d4ce0efd5a 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -717,7 +717,7 @@ xfs_inode_item_pushbuf(
717 * If a flush is not in progress anymore, chances are that the 717 * If a flush is not in progress anymore, chances are that the
718 * inode was taken off the AIL. So, just get out. 718 * inode was taken off the AIL. So, just get out.
719 */ 719 */
720 if (completion_done(&ip->i_flush) || 720 if (!xfs_isiflocked(ip) ||
721 !(lip->li_flags & XFS_LI_IN_AIL)) { 721 !(lip->li_flags & XFS_LI_IN_AIL)) {
722 xfs_iunlock(ip, XFS_ILOCK_SHARED); 722 xfs_iunlock(ip, XFS_ILOCK_SHARED);
723 return true; 723 return true;
@@ -750,7 +750,7 @@ xfs_inode_item_push(
750 struct xfs_inode *ip = iip->ili_inode; 750 struct xfs_inode *ip = iip->ili_inode;
751 751
752 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 752 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
753 ASSERT(!completion_done(&ip->i_flush)); 753 ASSERT(xfs_isiflocked(ip));
754 754
755 /* 755 /*
756 * Since we were able to lock the inode's flush lock and 756 * Since we were able to lock the inode's flush lock and
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 281961c1d81a..6851fa7b1afa 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -829,13 +829,6 @@ xfs_fs_inode_init_once(
829 atomic_set(&ip->i_pincount, 0); 829 atomic_set(&ip->i_pincount, 0);
830 spin_lock_init(&ip->i_flags_lock); 830 spin_lock_init(&ip->i_flags_lock);
831 init_waitqueue_head(&ip->i_ipin_wait); 831 init_waitqueue_head(&ip->i_ipin_wait);
832 /*
833 * Because we want to use a counting completion, complete
834 * the flush completion once to allow a single access to
835 * the flush completion without blocking.
836 */
837 init_completion(&ip->i_flush);
838 complete(&ip->i_flush);
839 832
840 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 833 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
841 "xfsino", ip->i_ino); 834 "xfsino", ip->i_ino);
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 72c01a1c16e7..40b75eecd2b4 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -707,14 +707,13 @@ xfs_reclaim_inode_grab(
707 return 1; 707 return 1;
708 708
709 /* 709 /*
710 * do some unlocked checks first to avoid unnecessary lock traffic. 710 * If we are asked for non-blocking operation, do unlocked checks to
711 * The first is a flush lock check, the second is a already in reclaim 711 * see if the inode already is being flushed or in reclaim to avoid
712 * check. Only do these checks if we are not going to block on locks. 712 * lock traffic.
713 */ 713 */
714 if ((flags & SYNC_TRYLOCK) && 714 if ((flags & SYNC_TRYLOCK) &&
715 (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { 715 __xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM))
716 return 1; 716 return 1;
717 }
718 717
719 /* 718 /*
720 * The radix tree lock here protects a thread in xfs_iget from racing 719 * The radix tree lock here protects a thread in xfs_iget from racing